# libraries that need to be installed and loaded
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0 ✓ purrr 0.3.3
## ✓ tibble 3.0.0 ✓ dplyr 0.8.5
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(mice)
##
## Attaching package: 'mice'
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(ggplot2)
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(lattice)
library(caret)
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
library(monomvn)
## Loading required package: pls
##
## Attaching package: 'pls'
## The following object is masked from 'package:caret':
##
## R2
## The following object is masked from 'package:stats':
##
## loadings
## Loading required package: lars
## Loaded lars 1.2
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(ineq)
library(car)
## Loading required package: carData
## Registered S3 methods overwritten by 'car':
## method from
## influence.merMod lme4
## cooks.distance.influence.merMod lme4
## dfbeta.influence.merMod lme4
## dfbetas.influence.merMod lme4
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
library(knitr)
library(reshape)
##
## Attaching package: 'reshape'
## The following object is masked from 'package:Matrix':
##
## expand
## The following objects are masked from 'package:reshape2':
##
## colsplit, melt, recast
## The following object is masked from 'package:dplyr':
##
## rename
## The following objects are masked from 'package:tidyr':
##
## expand, smiths
Data Preparation
# read in data
require(haven)
## Loading required package: haven
sashhinc=read_sas("hhinc_10.sas7bdat")
# let's make this more interpretable by letting 1989 be year 0
# also let's convert income to thousands of yuan
sashhinc$cyear=sashhinc$WAVE-1989
sashhinc$thousands=sashhinc$hhincpc_cpi/1000
# replace NAs w/ null
sashhinc[is.na(sashhinc)] = 0
sashhinc_trimmed = sashhinc
# shift variables to positive in the case we want to use a log transform
sashhinc$thousands_shifted=sashhinc$thousands - (min(sashhinc$thousands)-0.01)
# change to categorical variables
sashhinc$urban <- as.factor(sashhinc$urban)
sashhinc$hhid <- as.factor(sashhinc$hhid)
sashhinc$t1 <- as.factor(sashhinc$t1)
## .1% removed- put in sashhinc_trimmed df
lower = quantile(sashhinc$thousands, 0.001)
upper = quantile(sashhinc$thousands, 0.999)
sashhinc_trimmed = subset(sashhinc, lower < thousands & thousands < upper)
sashhinc_trimmed$thousands_shifted=sashhinc_trimmed$thousands - (min(sashhinc_trimmed$thousands)-0.01)
# take a peek at dataset
colnames(sashhinc)
## [1] "WAVE" "hhid" "HHBUS"
## [4] "HHBUSimp" "HHFARM" "HHFARMimp"
## [7] "HHFISH" "HHFISHimp" "HHGARDimp"
## [10] "hhgard" "HHLVST" "HHLVSTimp"
## [13] "HHOTHR" "HHOTHRimp" "HHSUB"
## [16] "HHSUBimp" "HHRETIRE" "HHRETIREimp"
## [19] "hhNRwage" "HHNRWAGEimp" "HHINC"
## [22] "hhincgross" "hhexpense" "HHINCimp"
## [25] "commid" "t1" "urban"
## [28] "T6_A" "T6_B" "hhsize"
## [31] "hhinc_pc" "CPI1988" "CPI2015"
## [34] "index_new" "index_old" "hhinc_cpi"
## [37] "hhincpc_cpi" "hhincgross_cpi" "hhexpense_cpi"
## [40] "cyear" "thousands" "thousands_shifted"
summary(sashhinc)
## WAVE hhid HHBUS HHBUSimp
## Min. :1989 321102003: 10 Min. :-564000 Min. :0.00000
## 1st Qu.:1997 321102006: 10 1st Qu.: 0 1st Qu.:0.00000
## Median :2004 321102007: 10 Median : 0 Median :0.00000
## Mean :2003 321102009: 10 Mean : 2227 Mean :0.02936
## 3rd Qu.:2011 321102010: 10 3rd Qu.: 0 3rd Qu.:0.00000
## Max. :2015 321102012: 10 Max. : 876000 Max. :1.00000
## (Other) :43611
## HHFARM HHFARMimp HHFISH HHFISHimp
## Min. :-147522 Min. :0.0000 Min. :-32500.00 Min. :0
## 1st Qu.: 0 1st Qu.:0.0000 1st Qu.: 0.00 1st Qu.:0
## Median : 0 Median :0.0000 Median : 0.00 Median :0
## Mean : 1710 Mean :0.1131 Mean : 51.85 Mean :0
## 3rd Qu.: 1500 3rd Qu.:0.0000 3rd Qu.: 0.00 3rd Qu.:0
## Max. :1099564 Max. :1.0000 Max. : 99000.00 Max. :0
##
## HHGARDimp hhgard HHLVST HHLVSTimp
## Min. :0.00000 Min. :-76400 Min. :-100199.0 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.: 0 1st Qu.: 0.0 1st Qu.:0.00000
## Median :0.00000 Median : 0 Median : 0.0 Median :0.00000
## Mean :0.05615 Mean : 2054 Mean : 259.6 Mean :0.05461
## 3rd Qu.:0.00000 3rd Qu.: 880 3rd Qu.: 0.0 3rd Qu.:0.00000
## Max. :1.00000 Max. :846000 Max. : 138957.7 Max. :1.00000
##
## HHOTHR HHOTHRimp HHSUB HHSUBimp
## Min. : 0 Min. :0.00000 Min. : 0.0 Min. :0.00000
## 1st Qu.: 0 1st Qu.:0.00000 1st Qu.: 0.0 1st Qu.:0.00000
## Median : 0 Median :0.00000 Median : 0.0 Median :0.00000
## Mean : 2050 Mean :0.02521 Mean : 381.5 Mean :0.06487
## 3rd Qu.: 1000 3rd Qu.:0.00000 3rd Qu.: 117.5 3rd Qu.:0.00000
## Max. :1019999 Max. :1.00000 Max. :216000.0 Max. :1.00000
##
## HHRETIRE HHRETIREimp hhNRwage HHNRWAGEimp
## Min. : 0 Min. :0.000000 Min. : 0 Min. :0.00000
## 1st Qu.: 0 1st Qu.:0.000000 1st Qu.: 0 1st Qu.:0.00000
## Median : 0 Median :0.000000 Median : 500 Median :0.00000
## Mean : 4163 Mean :0.004809 Mean : 12537 Mean :0.04206
## 3rd Qu.: 0 3rd Qu.:0.000000 3rd Qu.: 10000 3rd Qu.:0.00000
## Max. :180000 Max. :1.000000 Max. :4800000 Max. :1.00000
##
## HHINC hhincgross hhexpense HHINCimp
## Min. :-564000 Min. : 0 Min. : 0 Min. :0.0000
## 1st Qu.: 4251 1st Qu.: 5110 1st Qu.: 0 1st Qu.:0.0000
## Median : 11160 Median : 12800 Median : 300 Median :0.0000
## Mean : 25434 Mean : 28052 Mean : 2597 Mean :0.3009
## 3rd Qu.: 28844 3rd Qu.: 31750 3rd Qu.: 1669 3rd Qu.:1.0000
## Max. :4800000 Max. :4800000 Max. :1116000 Max. :1.0000
##
## commid t1 urban T6_A T6_B
## Min. :111101 52 : 4938 0:28483 Min. : 0.000 Min. :0.0000
## 1st Qu.:322101 45 : 4924 1:15188 1st Qu.: 2.000 1st Qu.:0.0000
## Median :412301 42 : 4733 Median : 3.000 Median :0.0000
## Mean :379669 41 : 4719 Mean : 3.249 Mean :0.3117
## 3rd Qu.:432404 43 : 4711 3rd Qu.: 4.000 3rd Qu.:0.0000
## Max. :552304 32 : 4701 Max. :14.000 Max. :9.0000
## (Other):14945
## hhsize hhinc_pc CPI1988 CPI2015
## Min. : 0.000 Min. :-188000 Min. :0.930 Min. :0.27
## 1st Qu.: 2.000 1st Qu.: 1208 1st Qu.:1.810 1st Qu.:0.53
## Median : 3.000 Median : 3325 Median :2.370 Median :0.69
## Mean : 3.561 Mean : 8494 Mean :2.321 Mean :0.68
## 3rd Qu.: 4.000 3rd Qu.: 9467 3rd Qu.:2.860 3rd Qu.:0.84
## Max. :15.000 Max. :1200000 Max. :4.530 Max. :1.33
##
## index_new index_old hhinc_cpi hhincpc_cpi
## Min. :0.27 Min. :0.930 Min. :-679518 Min. :-226506
## 1st Qu.:0.53 1st Qu.:1.810 1st Qu.: 8833 1st Qu.: 2502
## Median :0.69 Median :2.370 Median : 18132 Median : 5344
## Mean :0.68 Mean :2.321 Mean : 31353 Mean : 10178
## 3rd Qu.:0.84 3rd Qu.:2.860 3rd Qu.: 37375 3rd Qu.: 11959
## Max. :1.33 Max. :4.530 Max. :4528302 Max. :1132076
##
## hhincgross_cpi hhexpense_cpi cyear thousands
## Min. : 0 Min. : 0.0 Min. : 0.00 Min. :-226.506
## 1st Qu.: 10753 1st Qu.: 0.0 1st Qu.: 8.00 1st Qu.: 2.502
## Median : 20703 Median : 571.4 Median :15.00 Median : 5.344
## Mean : 35207 Mean : 3827.3 Mean :13.83 Mean : 10.178
## 3rd Qu.: 41357 3rd Qu.: 3135.3 3rd Qu.:22.00 3rd Qu.: 11.959
## Max. :4528302 Max. :1344578.3 Max. :26.00 Max. :1132.075
##
## thousands_shifted
## Min. : 0.01
## 1st Qu.: 229.02
## Median : 231.86
## Mean : 236.69
## 3rd Qu.: 238.47
## Max. :1358.59
##
# select variables we need for df
df = dplyr::select(sashhinc,
hhid,
WAVE,
hhinc_cpi,
hhincpc_cpi,
HHBUS,
HHBUSimp,
HHFARM,
HHFARMimp,
HHFISH,
HHFISHimp,
hhgard,
HHGARDimp,
HHLVST,
HHLVSTimp,
hhNRwage,
HHNRWAGEimp,
HHOTHR,
HHOTHRimp,
HHRETIRE,
HHRETIREimp,
HHSUB,
HHSUBimp,
t1,
urban,
thousands,
cyear,
thousands_shifted)
names(df)<- toupper(names(df))
#add primary income source variable
df <- df %>%
mutate(Primary = colnames(df[, c("HHBUS", "HHFARM", "HHFISH", "HHGARD", "HHLVST", "HHOTHR", "HHRETIRE", "HHSUB")])[apply(df[, c("HHBUS", "HHFARM", "HHFISH", "HHGARD", "HHLVST", "HHOTHR", "HHRETIRE", "HHSUB")], 1, which.max)])
# select variables we need for df w/o outliers
df_trimmed = dplyr::select(sashhinc_trimmed,
hhid,
WAVE,
hhinc_cpi,
hhincpc_cpi,
HHBUS,
HHBUSimp,
HHFARM,
HHFARMimp,
HHFISH,
HHFISHimp,
hhgard,
HHGARDimp,
HHLVST,
HHLVSTimp,
hhNRwage,
HHNRWAGEimp,
HHOTHR,
HHOTHRimp,
HHRETIRE,
HHRETIREimp,
HHSUB,
HHSUBimp,
t1,
urban,
thousands,
cyear,
thousands_shifted)
#add primary income source variable
df_trimmed <- df_trimmed %>%
mutate(Primary = colnames(df[, c("HHBUS", "HHFARM", "HHFISH", "HHGARD", "HHLVST", "HHOTHR", "HHRETIRE", "HHSUB")])[apply(df_trimmed[, c("HHBUS", "HHFARM", "HHFISH", "hhgard", "HHLVST", "HHOTHR", "HHRETIRE", "HHSUB")], 1, which.max)])
#add variable indicating whether household receives income from specific sources
df <- df %>%
mutate(business_ind = factor(as.numeric(HHBUS != 0)),
farm_ind = factor(as.numeric(HHFARM != 0)),
fish_ind = factor(as.numeric(HHFISH != 0)),
gard_ind = factor(as.numeric(HHGARD != 0)),
lvst_ind = factor(as.numeric(HHLVST != 0)),
othr_ind = factor(as.numeric(HHOTHR != 0)),
retire_ind = factor(as.numeric(HHRETIRE != 0)),
sub_ind = factor(as.numeric(HHSUB != 0)))
names(df)<- toupper(names(df))
df$T1 = as.factor(df$T1)
names(df_trimmed)<- toupper(names(df_trimmed))
df_trimmed$T1 = as.factor(df_trimmed$T1)
head(df)
## # A tibble: 6 x 36
## HHID WAVE HHINC_CPI HHINCPC_CPI HHBUS HHBUSIMP HHFARM HHFARMIMP HHFISH
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2111… 1989 14175. 4725. 0 0 0 0 0
## 2 2111… 1989 13086. 4362. 0 0 0 0 0
## 3 2111… 1989 15047. 5016. 0 0 0 0 0
## 4 2111… 1989 17020. 5673. 0 0 0 0 0
## 5 2111… 1989 28431. 9477. 0 0 0 0 0
## 6 2111… 1989 11576. 5788. 0 0 0 0 0
## # … with 27 more variables: HHFISHIMP <dbl>, HHGARD <dbl>, HHGARDIMP <dbl>,
## # HHLVST <dbl>, HHLVSTIMP <dbl>, HHNRWAGE <dbl>, HHNRWAGEIMP <dbl>,
## # HHOTHR <dbl>, HHOTHRIMP <dbl>, HHRETIRE <dbl>, HHRETIREIMP <dbl>,
## # HHSUB <dbl>, HHSUBIMP <dbl>, T1 <fct>, URBAN <fct>, THOUSANDS <dbl>,
## # CYEAR <dbl>, THOUSANDS_SHIFTED <dbl>, PRIMARY <chr>, BUSINESS_IND <fct>,
## # FARM_IND <fct>, FISH_IND <fct>, GARD_IND <fct>, LVST_IND <fct>,
## # OTHR_IND <fct>, RETIRE_IND <fct>, SUB_IND <fct>
head(df_trimmed)
## # A tibble: 6 x 28
## HHID WAVE HHINC_CPI HHINCPC_CPI HHBUS HHBUSIMP HHFARM HHFARMIMP HHFISH
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2111… 1989 14175. 4725. 0 0 0 0 0
## 2 2111… 1989 13086. 4362. 0 0 0 0 0
## 3 2111… 1989 15047. 5016. 0 0 0 0 0
## 4 2111… 1989 17020. 5673. 0 0 0 0 0
## 5 2111… 1989 28431. 9477. 0 0 0 0 0
## 6 2111… 1989 11576. 5788. 0 0 0 0 0
## # … with 19 more variables: HHFISHIMP <dbl>, HHGARD <dbl>, HHGARDIMP <dbl>,
## # HHLVST <dbl>, HHLVSTIMP <dbl>, HHNRWAGE <dbl>, HHNRWAGEIMP <dbl>,
## # HHOTHR <dbl>, HHOTHRIMP <dbl>, HHRETIRE <dbl>, HHRETIREIMP <dbl>,
## # HHSUB <dbl>, HHSUBIMP <dbl>, T1 <fct>, URBAN <fct>, THOUSANDS <dbl>,
## # CYEAR <dbl>, THOUSANDS_SHIFTED <dbl>, PRIMARY <chr>
Preliminary Checks
# fit model with all interactions
m1 = lmer(THOUSANDS ~ CYEAR + URBAN + T1 + PRIMARY +
CYEAR*URBAN + PRIMARY*CYEAR + URBAN*PRIMARY
+ CYEAR*T1 + URBAN*T1 + T1*PRIMARY
+ (1 | HHID), data=df)
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
summary(m1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: THOUSANDS ~ CYEAR + URBAN + T1 + PRIMARY + CYEAR * URBAN + PRIMARY *
## CYEAR + URBAN * PRIMARY + CYEAR * T1 + URBAN * T1 + T1 *
## PRIMARY + (1 | HHID)
## Data: df
##
## REML criterion at convergence: 367440
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -14.025 -0.284 -0.050 0.163 46.491
##
## Random effects:
## Groups Name Variance Std.Dev.
## HHID (Intercept) 138.9 11.79
## Residual 201.0 14.18
## Number of obs: 43671, groups: HHID, 9628
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -63.47811 6.67646 -9.508
## CYEAR 3.11303 0.25273 12.318
## URBAN1 12.85672 2.26241 5.683
## T121 62.98502 6.71494 9.380
## T123 52.80192 6.78430 7.783
## T131 29.81922 9.49085 3.142
## T132 64.66848 6.71860 9.625
## T137 64.71300 6.71282 9.640
## T141 63.39543 6.71487 9.441
## T142 63.08973 6.71538 9.395
## T143 66.44409 6.70987 9.902
## T145 64.97931 6.70591 9.690
## T152 62.86877 6.70721 9.373
## T155 57.78970 9.16511 6.305
## PRIMARYHHFARM 13.23116 4.91987 2.689
## PRIMARYHHFISH -6.92581 19.11678 -0.362
## PRIMARYHHGARD 11.63014 3.89550 2.986
## PRIMARYHHLVST -0.38807 8.84008 -0.044
## PRIMARYHHOTHR 15.16196 2.18153 6.950
## PRIMARYHHRETIRE -0.73363 2.05914 -0.356
## PRIMARYHHSUB 6.77936 2.12599 3.189
## CYEAR:URBAN1 0.11153 0.02951 3.780
## CYEAR:PRIMARYHHFARM -0.39521 0.03386 -11.671
## CYEAR:PRIMARYHHFISH -0.07912 0.18440 -0.429
## CYEAR:PRIMARYHHGARD -0.15564 0.03380 -4.605
## CYEAR:PRIMARYHHLVST -0.13042 0.07271 -1.794
## CYEAR:PRIMARYHHOTHR -0.13329 0.03705 -3.597
## CYEAR:PRIMARYHHRETIRE 0.14383 0.04066 3.537
## CYEAR:PRIMARYHHSUB -0.18086 0.04160 -4.347
## URBAN1:PRIMARYHHFARM -0.56676 1.44833 -0.391
## URBAN1:PRIMARYHHFISH 4.84726 6.73914 0.719
## URBAN1:PRIMARYHHGARD 0.26312 1.03494 0.254
## URBAN1:PRIMARYHHLVST 0.82906 2.03694 0.407
## URBAN1:PRIMARYHHOTHR 0.26005 0.57997 0.448
## URBAN1:PRIMARYHHRETIRE -0.03134 0.72007 -0.044
## URBAN1:PRIMARYHHSUB 1.22021 0.74191 1.645
## CYEAR:T121 -2.22155 0.25361 -8.760
## CYEAR:T123 -1.88498 0.25607 -7.361
## CYEAR:T131 -0.51588 0.35655 -1.447
## CYEAR:T132 -2.14269 0.25346 -8.454
## CYEAR:T137 -2.39658 0.25314 -9.468
## CYEAR:T141 -2.57461 0.25309 -10.173
## CYEAR:T142 -2.28326 0.25307 -9.022
## CYEAR:T143 -2.52265 0.25321 -9.963
## CYEAR:T145 -2.64231 0.25314 -10.438
## CYEAR:T152 -2.47050 0.25320 -9.757
## CYEAR:T155 -2.34270 0.35351 -6.627
## URBAN1:T121 -13.71257 2.37909 -5.764
## URBAN1:T123 -4.01332 2.51266 -1.597
## URBAN1:T131 -14.29782 2.86328 -4.994
## URBAN1:T132 -13.57151 2.38214 -5.697
## URBAN1:T137 -13.49382 2.35916 -5.720
## URBAN1:T141 -11.25385 2.40654 -4.676
## URBAN1:T142 -13.71377 2.42098 -5.665
## URBAN1:T143 -13.76509 2.37244 -5.802
## URBAN1:T145 -12.02212 2.38655 -5.037
## URBAN1:T152 -8.59692 2.39926 -3.583
## URBAN1:T155 -10.46102 2.61657 -3.998
## T121:PRIMARYHHFARM -10.63811 4.95069 -2.149
## T123:PRIMARYHHFARM -7.97679 4.97398 -1.604
## T131:PRIMARYHHFARM -18.39630 13.01617 -1.413
## T132:PRIMARYHHFARM -12.93698 4.94431 -2.617
## T137:PRIMARYHHFARM -11.05728 4.94173 -2.238
## T141:PRIMARYHHFARM -11.02634 4.92713 -2.238
## T142:PRIMARYHHFARM -9.56899 4.94251 -1.936
## T143:PRIMARYHHFARM -13.82735 4.96648 -2.784
## T145:PRIMARYHHFARM -11.28105 4.93556 -2.286
## T152:PRIMARYHHFARM -10.91695 4.93148 -2.214
## T155:PRIMARYHHFARM -4.84221 5.78368 -0.837
## T121:PRIMARYHHFISH 9.40252 20.58883 0.457
## T132:PRIMARYHHFISH 6.60504 19.24440 0.343
## T137:PRIMARYHHFISH 7.22641 20.28429 0.356
## T141:PRIMARYHHFISH 10.84328 20.88284 0.519
## T142:PRIMARYHHFISH 6.49379 18.78857 0.346
## T143:PRIMARYHHFISH 9.55755 18.99479 0.503
## T145:PRIMARYHHFISH 6.96299 19.16938 0.363
## T152:PRIMARYHHFISH 6.27891 21.49696 0.292
## T121:PRIMARYHHGARD -10.13772 3.93747 -2.575
## T123:PRIMARYHHGARD -9.46574 4.00134 -2.366
## T131:PRIMARYHHGARD -6.33916 5.46589 -1.160
## T132:PRIMARYHHGARD -10.95868 3.92998 -2.788
## T137:PRIMARYHHGARD -9.74915 3.96028 -2.462
## T141:PRIMARYHHGARD -10.59900 3.95453 -2.680
## T142:PRIMARYHHGARD -9.95248 3.89718 -2.554
## T143:PRIMARYHHGARD -12.23537 3.89952 -3.138
## T145:PRIMARYHHGARD -11.25875 3.89856 -2.888
## T152:PRIMARYHHGARD -10.48484 3.89636 -2.691
## T155:PRIMARYHHGARD -11.79882 4.19530 -2.812
## T121:PRIMARYHHLVST 3.02210 8.94752 0.338
## T123:PRIMARYHHLVST 9.92423 9.02273 1.100
## T131:PRIMARYHHLVST -10.78969 12.29382 -0.878
## T132:PRIMARYHHLVST 1.29142 8.93082 0.145
## T137:PRIMARYHHLVST 4.90421 9.04233 0.542
## T141:PRIMARYHHLVST 2.52882 8.88471 0.285
## T142:PRIMARYHHLVST 2.58812 8.94088 0.289
## T143:PRIMARYHHLVST -0.94807 8.80562 -0.108
## T145:PRIMARYHHLVST 0.44477 8.79007 0.051
## T152:PRIMARYHHLVST 1.27716 8.84177 0.144
## T155:PRIMARYHHLVST -2.58677 10.11686 -0.256
## T121:PRIMARYHHOTHR -15.45221 2.22385 -6.948
## T123:PRIMARYHHOTHR -14.11167 2.28652 -6.172
## T131:PRIMARYHHOTHR -9.19932 3.10304 -2.965
## T132:PRIMARYHHOTHR -16.08096 2.18392 -7.363
## T137:PRIMARYHHOTHR -14.91221 2.15861 -6.908
## T141:PRIMARYHHOTHR -14.01096 2.14728 -6.525
## T142:PRIMARYHHOTHR -16.74783 2.16510 -7.735
## T143:PRIMARYHHOTHR -15.85189 2.12287 -7.467
## T145:PRIMARYHHOTHR -14.63859 2.13549 -6.855
## T152:PRIMARYHHOTHR -16.96346 2.12991 -7.964
## T155:PRIMARYHHOTHR -12.20857 2.75253 -4.435
## T121:PRIMARYHHRETIRE 1.53155 1.97532 0.775
## T123:PRIMARYHHRETIRE -3.55621 2.21783 -1.603
## T131:PRIMARYHHRETIRE -6.11997 2.62481 -2.332
## T132:PRIMARYHHRETIRE -2.39910 2.00487 -1.197
## T137:PRIMARYHHRETIRE -0.21861 1.96185 -0.111
## T141:PRIMARYHHRETIRE 0.69591 2.04958 0.340
## T142:PRIMARYHHRETIRE -3.57599 1.98570 -1.801
## T143:PRIMARYHHRETIRE -0.68156 2.02056 -0.337
## T145:PRIMARYHHRETIRE -2.62042 1.97628 -1.326
## T152:PRIMARYHHRETIRE -0.51261 2.01861 -0.254
## T155:PRIMARYHHRETIRE 3.12800 2.54880 1.227
## T121:PRIMARYHHSUB -5.54266 2.15008 -2.578
## T123:PRIMARYHHSUB -8.47326 2.22836 -3.802
## T131:PRIMARYHHSUB -3.23920 3.39946 -0.953
## T132:PRIMARYHHSUB -6.01058 2.16528 -2.776
## T137:PRIMARYHHSUB -5.92944 2.09717 -2.827
## T141:PRIMARYHHSUB -6.66027 2.21639 -3.005
## T142:PRIMARYHHSUB -4.81160 2.14400 -2.244
## T143:PRIMARYHHSUB -7.80352 2.13199 -3.660
## T145:PRIMARYHHSUB -6.81338 2.15087 -3.168
## T152:PRIMARYHHSUB -8.31807 2.16058 -3.850
## T155:PRIMARYHHSUB -4.11441 3.35876 -1.225
##
## Correlation matrix not shown by default, as p = 132 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
anova(m1)
## Analysis of Variance Table
## npar Sum Sq Mean Sq F value
## CYEAR 1 1034212 1034212 5145.1393
## URBAN 1 67475 67475 335.6855
## T1 11 90947 8268 41.1323
## PRIMARY 7 30805 4401 21.8931
## CYEAR:URBAN 1 28974 28974 144.1417
## CYEAR:PRIMARY 7 47690 6813 33.8935
## URBAN:PRIMARY 7 3030 433 2.1536
## CYEAR:T1 11 97825 8893 44.2429
## URBAN:T1 11 13514 1229 6.1120
## T1:PRIMARY 74 48842 660 3.2836
qqmath(resid(m1))

plot(m1,resid(.,scaled=TRUE)~fitted(.),abline=0, xlab = "Fitted Values", ylab = "Residuals", main="Initial Model Residuals")

# fit model w/ log transform w/ outliers
m2 = lmer(log(THOUSANDS_SHIFTED) ~ CYEAR + URBAN + T1 + PRIMARY +
CYEAR*URBAN + PRIMARY*CYEAR + URBAN*PRIMARY
+ CYEAR*T1 + URBAN*T1 + T1*PRIMARY
+ (1 | HHID), data=df)
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
summary(m2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(THOUSANDS_SHIFTED) ~ CYEAR + URBAN + T1 + PRIMARY + CYEAR *
## URBAN + PRIMARY * CYEAR + URBAN * PRIMARY + CYEAR * T1 +
## URBAN * T1 + T1 * PRIMARY + (1 | HHID)
## Data: df
##
## REML criterion at convergence: -107455.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -138.909 -0.270 -0.057 0.136 23.258
##
## Random effects:
## Groups Name Variance Std.Dev.
## HHID (Intercept) 0.0005046 0.02246
## Residual 0.0044727 0.06688
## Number of obs: 43671, groups: HHID, 9628
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 5.2550586 0.0293460 179.072
## CYEAR 0.0092445 0.0011382 8.122
## URBAN1 0.0425616 0.0083542 5.095
## T121 0.1685402 0.0294411 5.725
## T123 0.1259703 0.0297327 4.237
## T131 0.0713243 0.0416467 1.713
## T132 0.1743841 0.0294447 5.922
## T137 0.1731355 0.0294304 5.883
## T141 0.1685982 0.0294378 5.727
## T142 0.1666256 0.0294324 5.661
## T143 0.1906330 0.0294113 6.482
## T145 0.1755471 0.0293896 5.973
## T152 0.1654509 0.0294083 5.626
## T155 0.1501979 0.0406290 3.697
## PRIMARYHHFARM 0.0413012 0.0206219 2.003
## PRIMARYHHFISH -0.0314548 0.0740300 -0.425
## PRIMARYHHGARD 0.0329332 0.0156022 2.111
## PRIMARYHHLVST -0.0124218 0.0368227 -0.337
## PRIMARYHHOTHR 0.0403738 0.0089815 4.495
## PRIMARYHHRETIRE -0.0067145 0.0081535 -0.824
## PRIMARYHHSUB 0.0200021 0.0087296 2.291
## CYEAR:URBAN1 0.0003701 0.0001258 2.943
## CYEAR:PRIMARYHHFARM -0.0018322 0.0001516 -12.082
## CYEAR:PRIMARYHHFISH -0.0001523 0.0008295 -0.184
## CYEAR:PRIMARYHHGARD -0.0005356 0.0001509 -3.548
## CYEAR:PRIMARYHHLVST -0.0002802 0.0003285 -0.853
## CYEAR:PRIMARYHHOTHR -0.0004947 0.0001648 -3.003
## CYEAR:PRIMARYHHRETIRE 0.0008158 0.0001754 4.651
## CYEAR:PRIMARYHHSUB -0.0004172 0.0001813 -2.301
## URBAN1:PRIMARYHHFARM -0.0067629 0.0063100 -1.072
## URBAN1:PRIMARYHHFISH 0.0192441 0.0302786 0.636
## URBAN1:PRIMARYHHGARD -0.0011049 0.0044282 -0.250
## URBAN1:PRIMARYHHLVST 0.0027009 0.0091283 0.296
## URBAN1:PRIMARYHHOTHR 0.0019336 0.0025132 0.769
## URBAN1:PRIMARYHHRETIRE 0.0014745 0.0029157 0.506
## URBAN1:PRIMARYHHSUB 0.0046811 0.0031595 1.482
## CYEAR:T121 -0.0057717 0.0011414 -5.057
## CYEAR:T123 -0.0043374 0.0011527 -3.763
## CYEAR:T131 -0.0008384 0.0016031 -0.523
## CYEAR:T132 -0.0054180 0.0011409 -4.749
## CYEAR:T137 -0.0063704 0.0011397 -5.589
## CYEAR:T141 -0.0070055 0.0011397 -6.147
## CYEAR:T142 -0.0059912 0.0011400 -5.255
## CYEAR:T143 -0.0074354 0.0011403 -6.521
## CYEAR:T145 -0.0074547 0.0011398 -6.540
## CYEAR:T152 -0.0065829 0.0011401 -5.774
## CYEAR:T155 -0.0063080 0.0015979 -3.948
## URBAN1:T121 -0.0473368 0.0083780 -5.650
## URBAN1:T123 -0.0128469 0.0090252 -1.423
## URBAN1:T131 -0.0389441 0.0101721 -3.829
## URBAN1:T132 -0.0468964 0.0083897 -5.590
## URBAN1:T137 -0.0468388 0.0082888 -5.651
## URBAN1:T141 -0.0404064 0.0084899 -4.759
## URBAN1:T142 -0.0471390 0.0084249 -5.595
## URBAN1:T143 -0.0487929 0.0082902 -5.886
## URBAN1:T145 -0.0460297 0.0084264 -5.463
## URBAN1:T152 -0.0289733 0.0084662 -3.422
## URBAN1:T155 -0.0325769 0.0093852 -3.471
## T121:PRIMARYHHFARM -0.0341761 0.0207243 -1.649
## T123:PRIMARYHHFARM -0.0152346 0.0208186 -0.732
## T131:PRIMARYHHFARM -0.0328921 0.0542515 -0.606
## T132:PRIMARYHHFARM -0.0411895 0.0207143 -1.988
## T137:PRIMARYHHFARM -0.0350057 0.0206902 -1.692
## T141:PRIMARYHHFARM -0.0323764 0.0206371 -1.569
## T142:PRIMARYHHFARM -0.0296306 0.0206965 -1.432
## T143:PRIMARYHHFARM -0.0725540 0.0208218 -3.485
## T145:PRIMARYHHFARM -0.0323826 0.0206788 -1.566
## T152:PRIMARYHHFARM -0.0325656 0.0206666 -1.576
## T155:PRIMARYHHFARM -0.0048046 0.0239085 -0.201
## T121:PRIMARYHHFISH 0.0288273 0.0820042 0.352
## T132:PRIMARYHHFISH 0.0357645 0.0747390 0.479
## T137:PRIMARYHHFISH 0.0281831 0.0798660 0.353
## T141:PRIMARYHHFISH 0.0442552 0.0827339 0.535
## T142:PRIMARYHHFISH 0.0291210 0.0722135 0.403
## T143:PRIMARYHHFISH 0.0326858 0.0731650 0.447
## T145:PRIMARYHHFISH 0.0351151 0.0743432 0.472
## T152:PRIMARYHHFISH 0.0303029 0.0869223 0.349
## T121:PRIMARYHHGARD -0.0300917 0.0157505 -1.911
## T123:PRIMARYHHGARD -0.0244437 0.0160580 -1.522
## T131:PRIMARYHHGARD -0.0328620 0.0217274 -1.512
## T132:PRIMARYHHGARD -0.0333316 0.0157589 -2.115
## T137:PRIMARYHHGARD -0.0289047 0.0158825 -1.820
## T141:PRIMARYHHGARD -0.0306987 0.0158854 -1.933
## T142:PRIMARYHHGARD -0.0293501 0.0155936 -1.882
## T143:PRIMARYHHGARD -0.0421373 0.0156022 -2.701
## T145:PRIMARYHHGARD -0.0323557 0.0155970 -2.074
## T152:PRIMARYHHGARD -0.0309585 0.0156039 -1.984
## T155:PRIMARYHHGARD -0.0364870 0.0166526 -2.191
## T121:PRIMARYHHLVST 0.0172377 0.0373374 0.462
## T123:PRIMARYHHLVST 0.0448901 0.0376230 1.193
## T131:PRIMARYHHLVST -0.0354855 0.0509504 -0.696
## T132:PRIMARYHHLVST 0.0102568 0.0372649 0.275
## T137:PRIMARYHHLVST 0.0244022 0.0378063 0.645
## T141:PRIMARYHHLVST 0.0178229 0.0370488 0.481
## T142:PRIMARYHHLVST 0.0215238 0.0373147 0.577
## T143:PRIMARYHHLVST -0.0044711 0.0366397 -0.122
## T145:PRIMARYHHLVST 0.0114797 0.0365627 0.314
## T152:PRIMARYHHLVST 0.0133457 0.0368259 0.362
## T155:PRIMARYHHLVST -0.0077738 0.0415474 -0.187
## T121:PRIMARYHHOTHR -0.0449265 0.0091304 -4.921
## T123:PRIMARYHHOTHR -0.0352599 0.0093919 -3.754
## T131:PRIMARYHHOTHR -0.0250123 0.0125662 -1.990
## T132:PRIMARYHHOTHR -0.0427954 0.0089565 -4.778
## T137:PRIMARYHHOTHR -0.0401672 0.0088472 -4.540
## T141:PRIMARYHHOTHR -0.0358910 0.0087883 -4.084
## T142:PRIMARYHHOTHR -0.0478567 0.0089024 -5.376
## T143:PRIMARYHHOTHR -0.0442005 0.0086756 -5.095
## T145:PRIMARYHHOTHR -0.0368456 0.0087355 -4.218
## T152:PRIMARYHHOTHR -0.0489143 0.0087165 -5.612
## T155:PRIMARYHHOTHR -0.0273605 0.0111151 -2.462
## T121:PRIMARYHHRETIRE 0.0085013 0.0076208 1.116
## T123:PRIMARYHHRETIRE -0.0096738 0.0087065 -1.111
## T131:PRIMARYHHRETIRE -0.0226773 0.0101822 -2.227
## T132:PRIMARYHHRETIRE -0.0059007 0.0077780 -0.759
## T137:PRIMARYHHRETIRE 0.0035008 0.0076273 0.459
## T141:PRIMARYHHRETIRE 0.0068253 0.0079813 0.855
## T142:PRIMARYHHRETIRE -0.0113179 0.0077892 -1.453
## T143:PRIMARYHHRETIRE 0.0031822 0.0078853 0.404
## T145:PRIMARYHHRETIRE -0.0047902 0.0077104 -0.621
## T152:PRIMARYHHRETIRE 0.0002537 0.0079037 0.032
## T155:PRIMARYHHRETIRE 0.0113551 0.0097214 1.168
## T121:PRIMARYHHSUB -0.0112021 0.0087526 -1.280
## T123:PRIMARYHHSUB -0.0229826 0.0091654 -2.508
## T131:PRIMARYHHSUB -0.0059112 0.0137822 -0.429
## T132:PRIMARYHHSUB -0.0148314 0.0088362 -1.678
## T137:PRIMARYHHSUB -0.0152182 0.0085786 -1.774
## T141:PRIMARYHHSUB -0.0150387 0.0089959 -1.672
## T142:PRIMARYHHSUB -0.0097392 0.0088196 -1.104
## T143:PRIMARYHHSUB -0.0238762 0.0087210 -2.738
## T145:PRIMARYHHSUB -0.0182125 0.0088387 -2.061
## T152:PRIMARYHHSUB -0.0216936 0.0088343 -2.456
## T155:PRIMARYHHSUB -0.0130575 0.0136407 -0.957
##
## Correlation matrix not shown by default, as p = 132 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
qqmath(resid(m2))

plot(m2,resid(.,scaled=TRUE)~fitted(.),abline=0, xlab = "Fitted Values", ylab = "Residuals", main="Logged Model Residuals")

# fit model w/ log transform w/o outliers
m3 = lmer(log(THOUSANDS_SHIFTED) ~ CYEAR + URBAN + T1 + PRIMARY +
CYEAR*URBAN + PRIMARY*CYEAR + URBAN*PRIMARY
+ CYEAR*T1 + URBAN*T1 + T1*PRIMARY
+ (1 | HHID), data=df_trimmed)
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
summary(m3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(THOUSANDS_SHIFTED) ~ CYEAR + URBAN + T1 + PRIMARY + CYEAR *
## URBAN + PRIMARY * CYEAR + URBAN * PRIMARY + CYEAR * T1 +
## URBAN * T1 + T1 * PRIMARY + (1 | HHID)
## Data: df_trimmed
##
## REML criterion at convergence: 82721.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -9.5981 -0.5149 0.0153 0.5331 6.2093
##
## Random effects:
## Groups Name Variance Std.Dev.
## HHID (Intercept) 0.1116 0.3341
## Residual 0.3194 0.5651
## Number of obs: 43583, groups: HHID, 9623
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.5618304 0.2581509 6.050
## CYEAR 0.0430227 0.0098913 4.350
## URBAN1 0.3160395 0.0798599 3.957
## T121 0.0146482 0.2592921 0.056
## T123 -0.7170148 0.2619674 -2.737
## T131 0.2014195 0.3661220 0.550
## T132 0.1329570 0.2593712 0.513
## T137 -0.0269323 0.2592029 -0.104
## T141 -0.1838794 0.2592722 -0.709
## T142 -0.1753367 0.2592573 -0.676
## T143 0.0678627 0.2590604 0.262
## T145 -0.0133963 0.2588767 -0.052
## T152 -0.1956473 0.2589818 -0.755
## T155 -0.0024103 0.3552454 -0.007
## PRIMARYHHFARM -0.1384962 0.1855914 -0.746
## PRIMARYHHFISH -0.2240370 0.6854126 -0.327
## PRIMARYHHGARD -0.1492003 0.1436812 -1.038
## PRIMARYHHLVST -0.8132208 0.3327167 -2.444
## PRIMARYHHOTHR 0.1795430 0.0821877 2.185
## PRIMARYHHRETIRE -0.1162181 0.0755399 -1.539
## PRIMARYHHSUB 0.0042437 0.0795875 0.053
## CYEAR:URBAN1 0.0035260 0.0011303 3.120
## CYEAR:PRIMARYHHFARM -0.0063348 0.0013276 -4.772
## CYEAR:PRIMARYHHFISH 0.0033799 0.0072426 0.467
## CYEAR:PRIMARYHHGARD 0.0089151 0.0013218 6.745
## CYEAR:PRIMARYHHLVST 0.0095225 0.0028547 3.336
## CYEAR:PRIMARYHHOTHR -0.0053007 0.0014460 -3.666
## CYEAR:PRIMARYHHRETIRE 0.0166843 0.0015656 10.657
## CYEAR:PRIMARYHHSUB 0.0004642 0.0016082 0.289
## URBAN1:PRIMARYHHFARM -0.0394113 0.0567223 -0.695
## URBAN1:PRIMARYHHFISH 0.1679606 0.2641293 0.636
## URBAN1:PRIMARYHHGARD -0.0259260 0.0397746 -0.652
## URBAN1:PRIMARYHHLVST 0.0652337 0.0798325 0.817
## URBAN1:PRIMARYHHOTHR -0.0169596 0.0224079 -0.757
## URBAN1:PRIMARYHHRETIRE 0.0504328 0.0270507 1.864
## URBAN1:PRIMARYHHSUB 0.1086176 0.0284339 3.820
## CYEAR:T121 0.0069811 0.0099227 0.704
## CYEAR:T123 0.0262604 0.0100198 2.621
## CYEAR:T131 0.0095234 0.0139345 0.683
## CYEAR:T132 0.0079582 0.0099176 0.802
## CYEAR:T137 -0.0019718 0.0099060 -0.199
## CYEAR:T141 -0.0109794 0.0099049 -1.108
## CYEAR:T142 0.0027835 0.0099058 0.281
## CYEAR:T143 -0.0116312 0.0099099 -1.174
## CYEAR:T145 -0.0160639 0.0099062 -1.622
## CYEAR:T152 -0.0036789 0.0099088 -0.371
## CYEAR:T155 -0.0228148 0.0138438 -1.648
## URBAN1:T121 -0.3438524 0.0821327 -4.187
## URBAN1:T123 0.2240949 0.0876493 2.557
## URBAN1:T131 -0.2653309 0.0991733 -2.675
## URBAN1:T132 -0.3924686 0.0822641 -4.771
## URBAN1:T137 -0.3556785 0.0813637 -4.371
## URBAN1:T141 -0.1330007 0.0832333 -1.598
## URBAN1:T142 -0.2787304 0.0831610 -3.352
## URBAN1:T143 -0.2763176 0.0816202 -3.385
## URBAN1:T145 -0.3506235 0.0825019 -4.250
## URBAN1:T152 -0.0183336 0.0829218 -0.221
## URBAN1:T155 -0.1033889 0.0909590 -1.137
## T121:PRIMARYHHFARM -0.1926021 0.1867068 -1.032
## T123:PRIMARYHHFARM 0.2249867 0.1876227 1.199
## T131:PRIMARYHHFARM -0.1688836 0.4902134 -0.345
## T132:PRIMARYHHFARM -0.1689360 0.1865117 -0.906
## T137:PRIMARYHHFARM -0.0277265 0.1863729 -0.149
## T141:PRIMARYHHFARM -0.0762178 0.1858284 -0.410
## T142:PRIMARYHHFARM -0.0282780 0.1864352 -0.152
## T143:PRIMARYHHFARM -0.1295494 0.1874575 -0.691
## T145:PRIMARYHHFARM -0.1111607 0.1861745 -0.597
## T152:PRIMARYHHFARM -0.1011704 0.1860285 -0.544
## T155:PRIMARYHHFARM 0.5160301 0.2172880 2.375
## T121:PRIMARYHHFISH 0.3568243 0.7492528 0.476
## T132:PRIMARYHHFISH 0.3194231 0.6909858 0.462
## T137:PRIMARYHHFISH 0.2834861 0.7345091 0.386
## T141:PRIMARYHHFISH 0.5274449 0.7599429 0.694
## T142:PRIMARYHHFISH 0.2415545 0.6709912 0.360
## T143:PRIMARYHHFISH 0.4538453 0.6794552 0.668
## T145:PRIMARYHHFISH 0.3367605 0.6878997 0.490
## T152:PRIMARYHHFISH 0.1305119 0.7885433 0.166
## T121:PRIMARYHHGARD -0.1621139 0.1452369 -1.116
## T123:PRIMARYHHGARD 0.0771567 0.1479006 0.522
## T131:PRIMARYHHGARD -0.2279779 0.2025880 -1.125
## T132:PRIMARYHHGARD -0.0845802 0.1450825 -0.583
## T137:PRIMARYHHGARD 0.1198030 0.1462495 0.819
## T141:PRIMARYHHGARD 0.0125943 0.1460937 0.086
## T142:PRIMARYHHGARD 0.0062095 0.1437221 0.043
## T143:PRIMARYHHGARD -0.0449206 0.1437907 -0.312
## T145:PRIMARYHHGARD -0.0809027 0.1437526 -0.563
## T152:PRIMARYHHGARD -0.0236003 0.1437133 -0.164
## T155:PRIMARYHHGARD 0.0757281 0.1540392 0.492
## T121:PRIMARYHHLVST 0.4768837 0.3370989 1.415
## T123:PRIMARYHHLVST 1.1860814 0.3400202 3.488
## T131:PRIMARYHHLVST 0.1853870 0.4615335 0.402
## T132:PRIMARYHHLVST 0.5719443 0.3365163 1.700
## T137:PRIMARYHHLVST 0.8743785 0.3409695 2.564
## T141:PRIMARYHHLVST 0.7971393 0.3345954 2.382
## T142:PRIMARYHHLVST 0.7258149 0.3368771 2.155
## T143:PRIMARYHHLVST 0.3801149 0.3312613 1.147
## T145:PRIMARYHHLVST 0.5781784 0.3306270 1.749
## T152:PRIMARYHHLVST 0.5408523 0.3327848 1.625
## T155:PRIMARYHHLVST 0.4866854 0.3779092 1.288
## T121:PRIMARYHHOTHR -0.3280062 0.0837051 -3.919
## T123:PRIMARYHHOTHR -0.1298177 0.0861693 -1.507
## T131:PRIMARYHHOTHR -0.1703986 0.1156399 -1.474
## T132:PRIMARYHHOTHR -0.1887592 0.0821778 -2.297
## T137:PRIMARYHHOTHR -0.2041672 0.0812045 -2.514
## T141:PRIMARYHHOTHR -0.2007368 0.0807209 -2.487
## T142:PRIMARYHHOTHR -0.3418783 0.0815589 -4.192
## T143:PRIMARYHHOTHR -0.2489674 0.0797648 -3.121
## T145:PRIMARYHHOTHR -0.1821113 0.0802630 -2.269
## T152:PRIMARYHHOTHR -0.3405880 0.0800679 -4.254
## T155:PRIMARYHHOTHR 0.0591478 0.1023612 0.578
## T121:PRIMARYHHRETIRE 0.0861324 0.0716166 1.203
## T123:PRIMARYHHRETIRE -0.0861180 0.0812796 -1.060
## T131:PRIMARYHHRETIRE -0.2966162 0.0952242 -3.115
## T132:PRIMARYHHRETIRE -0.0235444 0.0729320 -0.323
## T137:PRIMARYHHRETIRE 0.1376026 0.0713851 1.928
## T141:PRIMARYHHRETIRE 0.1580941 0.0747122 2.116
## T142:PRIMARYHHRETIRE -0.0721954 0.0725979 -0.994
## T143:PRIMARYHHRETIRE 0.1231290 0.0737136 1.670
## T145:PRIMARYHHRETIRE 0.0111629 0.0720137 0.155
## T152:PRIMARYHHRETIRE -0.0228738 0.0737762 -0.310
## T155:PRIMARYHHRETIRE 0.3507000 0.0915713 3.830
## T121:PRIMARYHHSUB -0.0354432 0.0802377 -0.442
## T123:PRIMARYHHSUB -0.1092376 0.0836217 -1.306
## T131:PRIMARYHHSUB -0.0610588 0.1266517 -0.482
## T132:PRIMARYHHSUB -0.0372753 0.0809144 -0.461
## T137:PRIMARYHHSUB -0.0350315 0.0784096 -0.447
## T141:PRIMARYHHSUB -0.0803380 0.0826573 -0.972
## T142:PRIMARYHHSUB 0.0334957 0.0804020 0.417
## T143:PRIMARYHHSUB -0.1176882 0.0797352 -1.476
## T145:PRIMARYHHSUB -0.1458065 0.0805663 -1.810
## T152:PRIMARYHHSUB -0.1596282 0.0808367 -1.975
## T155:PRIMARYHHSUB 0.0341838 0.1246815 0.274
##
## Correlation matrix not shown by default, as p = 132 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
qqmath(resid(m3))

plot(m3,resid(.,scaled=TRUE)~fitted(.),abline=0, xlab = "Fitted Values", ylab = "Residuals", main="Residuals of log model without outliers")

Initial Model
colnames(df)
## [1] "HHID" "WAVE" "HHINC_CPI"
## [4] "HHINCPC_CPI" "HHBUS" "HHBUSIMP"
## [7] "HHFARM" "HHFARMIMP" "HHFISH"
## [10] "HHFISHIMP" "HHGARD" "HHGARDIMP"
## [13] "HHLVST" "HHLVSTIMP" "HHNRWAGE"
## [16] "HHNRWAGEIMP" "HHOTHR" "HHOTHRIMP"
## [19] "HHRETIRE" "HHRETIREIMP" "HHSUB"
## [22] "HHSUBIMP" "T1" "URBAN"
## [25] "THOUSANDS" "CYEAR" "THOUSANDS_SHIFTED"
## [28] "PRIMARY" "BUSINESS_IND" "FARM_IND"
## [31] "FISH_IND" "GARD_IND" "LVST_IND"
## [34] "OTHR_IND" "RETIRE_IND" "SUB_IND"
# fit model with all interactions
m1 = lmer(THOUSANDS ~ CYEAR + URBAN + T1 + PRIMARY +
CYEAR*URBAN + PRIMARY*CYEAR + URBAN*PRIMARY
+ CYEAR*T1 + URBAN*T1 + T1*PRIMARY
+ (1 | HHID), data=df)
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
summary(m1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: THOUSANDS ~ CYEAR + URBAN + T1 + PRIMARY + CYEAR * URBAN + PRIMARY *
## CYEAR + URBAN * PRIMARY + CYEAR * T1 + URBAN * T1 + T1 *
## PRIMARY + (1 | HHID)
## Data: df
##
## REML criterion at convergence: 367440
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -14.025 -0.284 -0.050 0.163 46.491
##
## Random effects:
## Groups Name Variance Std.Dev.
## HHID (Intercept) 138.9 11.79
## Residual 201.0 14.18
## Number of obs: 43671, groups: HHID, 9628
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -63.47811 6.67646 -9.508
## CYEAR 3.11303 0.25273 12.318
## URBAN1 12.85672 2.26241 5.683
## T121 62.98502 6.71494 9.380
## T123 52.80192 6.78430 7.783
## T131 29.81922 9.49085 3.142
## T132 64.66848 6.71860 9.625
## T137 64.71300 6.71282 9.640
## T141 63.39543 6.71487 9.441
## T142 63.08973 6.71538 9.395
## T143 66.44409 6.70987 9.902
## T145 64.97931 6.70591 9.690
## T152 62.86877 6.70721 9.373
## T155 57.78970 9.16511 6.305
## PRIMARYHHFARM 13.23116 4.91987 2.689
## PRIMARYHHFISH -6.92581 19.11678 -0.362
## PRIMARYHHGARD 11.63014 3.89550 2.986
## PRIMARYHHLVST -0.38807 8.84008 -0.044
## PRIMARYHHOTHR 15.16196 2.18153 6.950
## PRIMARYHHRETIRE -0.73363 2.05914 -0.356
## PRIMARYHHSUB 6.77936 2.12599 3.189
## CYEAR:URBAN1 0.11153 0.02951 3.780
## CYEAR:PRIMARYHHFARM -0.39521 0.03386 -11.671
## CYEAR:PRIMARYHHFISH -0.07912 0.18440 -0.429
## CYEAR:PRIMARYHHGARD -0.15564 0.03380 -4.605
## CYEAR:PRIMARYHHLVST -0.13042 0.07271 -1.794
## CYEAR:PRIMARYHHOTHR -0.13329 0.03705 -3.597
## CYEAR:PRIMARYHHRETIRE 0.14383 0.04066 3.537
## CYEAR:PRIMARYHHSUB -0.18086 0.04160 -4.347
## URBAN1:PRIMARYHHFARM -0.56676 1.44833 -0.391
## URBAN1:PRIMARYHHFISH 4.84726 6.73914 0.719
## URBAN1:PRIMARYHHGARD 0.26312 1.03494 0.254
## URBAN1:PRIMARYHHLVST 0.82906 2.03694 0.407
## URBAN1:PRIMARYHHOTHR 0.26005 0.57997 0.448
## URBAN1:PRIMARYHHRETIRE -0.03134 0.72007 -0.044
## URBAN1:PRIMARYHHSUB 1.22021 0.74191 1.645
## CYEAR:T121 -2.22155 0.25361 -8.760
## CYEAR:T123 -1.88498 0.25607 -7.361
## CYEAR:T131 -0.51588 0.35655 -1.447
## CYEAR:T132 -2.14269 0.25346 -8.454
## CYEAR:T137 -2.39658 0.25314 -9.468
## CYEAR:T141 -2.57461 0.25309 -10.173
## CYEAR:T142 -2.28326 0.25307 -9.022
## CYEAR:T143 -2.52265 0.25321 -9.963
## CYEAR:T145 -2.64231 0.25314 -10.438
## CYEAR:T152 -2.47050 0.25320 -9.757
## CYEAR:T155 -2.34270 0.35351 -6.627
## URBAN1:T121 -13.71257 2.37909 -5.764
## URBAN1:T123 -4.01332 2.51266 -1.597
## URBAN1:T131 -14.29782 2.86328 -4.994
## URBAN1:T132 -13.57151 2.38214 -5.697
## URBAN1:T137 -13.49382 2.35916 -5.720
## URBAN1:T141 -11.25385 2.40654 -4.676
## URBAN1:T142 -13.71377 2.42098 -5.665
## URBAN1:T143 -13.76509 2.37244 -5.802
## URBAN1:T145 -12.02212 2.38655 -5.037
## URBAN1:T152 -8.59692 2.39926 -3.583
## URBAN1:T155 -10.46102 2.61657 -3.998
## T121:PRIMARYHHFARM -10.63811 4.95069 -2.149
## T123:PRIMARYHHFARM -7.97679 4.97398 -1.604
## T131:PRIMARYHHFARM -18.39630 13.01617 -1.413
## T132:PRIMARYHHFARM -12.93698 4.94431 -2.617
## T137:PRIMARYHHFARM -11.05728 4.94173 -2.238
## T141:PRIMARYHHFARM -11.02634 4.92713 -2.238
## T142:PRIMARYHHFARM -9.56899 4.94251 -1.936
## T143:PRIMARYHHFARM -13.82735 4.96648 -2.784
## T145:PRIMARYHHFARM -11.28105 4.93556 -2.286
## T152:PRIMARYHHFARM -10.91695 4.93148 -2.214
## T155:PRIMARYHHFARM -4.84221 5.78368 -0.837
## T121:PRIMARYHHFISH 9.40252 20.58883 0.457
## T132:PRIMARYHHFISH 6.60504 19.24440 0.343
## T137:PRIMARYHHFISH 7.22641 20.28429 0.356
## T141:PRIMARYHHFISH 10.84328 20.88284 0.519
## T142:PRIMARYHHFISH 6.49379 18.78857 0.346
## T143:PRIMARYHHFISH 9.55755 18.99479 0.503
## T145:PRIMARYHHFISH 6.96299 19.16938 0.363
## T152:PRIMARYHHFISH 6.27891 21.49696 0.292
## T121:PRIMARYHHGARD -10.13772 3.93747 -2.575
## T123:PRIMARYHHGARD -9.46574 4.00134 -2.366
## T131:PRIMARYHHGARD -6.33916 5.46589 -1.160
## T132:PRIMARYHHGARD -10.95868 3.92998 -2.788
## T137:PRIMARYHHGARD -9.74915 3.96028 -2.462
## T141:PRIMARYHHGARD -10.59900 3.95453 -2.680
## T142:PRIMARYHHGARD -9.95248 3.89718 -2.554
## T143:PRIMARYHHGARD -12.23537 3.89952 -3.138
## T145:PRIMARYHHGARD -11.25875 3.89856 -2.888
## T152:PRIMARYHHGARD -10.48484 3.89636 -2.691
## T155:PRIMARYHHGARD -11.79882 4.19530 -2.812
## T121:PRIMARYHHLVST 3.02210 8.94752 0.338
## T123:PRIMARYHHLVST 9.92423 9.02273 1.100
## T131:PRIMARYHHLVST -10.78969 12.29382 -0.878
## T132:PRIMARYHHLVST 1.29142 8.93082 0.145
## T137:PRIMARYHHLVST 4.90421 9.04233 0.542
## T141:PRIMARYHHLVST 2.52882 8.88471 0.285
## T142:PRIMARYHHLVST 2.58812 8.94088 0.289
## T143:PRIMARYHHLVST -0.94807 8.80562 -0.108
## T145:PRIMARYHHLVST 0.44477 8.79007 0.051
## T152:PRIMARYHHLVST 1.27716 8.84177 0.144
## T155:PRIMARYHHLVST -2.58677 10.11686 -0.256
## T121:PRIMARYHHOTHR -15.45221 2.22385 -6.948
## T123:PRIMARYHHOTHR -14.11167 2.28652 -6.172
## T131:PRIMARYHHOTHR -9.19932 3.10304 -2.965
## T132:PRIMARYHHOTHR -16.08096 2.18392 -7.363
## T137:PRIMARYHHOTHR -14.91221 2.15861 -6.908
## T141:PRIMARYHHOTHR -14.01096 2.14728 -6.525
## T142:PRIMARYHHOTHR -16.74783 2.16510 -7.735
## T143:PRIMARYHHOTHR -15.85189 2.12287 -7.467
## T145:PRIMARYHHOTHR -14.63859 2.13549 -6.855
## T152:PRIMARYHHOTHR -16.96346 2.12991 -7.964
## T155:PRIMARYHHOTHR -12.20857 2.75253 -4.435
## T121:PRIMARYHHRETIRE 1.53155 1.97532 0.775
## T123:PRIMARYHHRETIRE -3.55621 2.21783 -1.603
## T131:PRIMARYHHRETIRE -6.11997 2.62481 -2.332
## T132:PRIMARYHHRETIRE -2.39910 2.00487 -1.197
## T137:PRIMARYHHRETIRE -0.21861 1.96185 -0.111
## T141:PRIMARYHHRETIRE 0.69591 2.04958 0.340
## T142:PRIMARYHHRETIRE -3.57599 1.98570 -1.801
## T143:PRIMARYHHRETIRE -0.68156 2.02056 -0.337
## T145:PRIMARYHHRETIRE -2.62042 1.97628 -1.326
## T152:PRIMARYHHRETIRE -0.51261 2.01861 -0.254
## T155:PRIMARYHHRETIRE 3.12800 2.54880 1.227
## T121:PRIMARYHHSUB -5.54266 2.15008 -2.578
## T123:PRIMARYHHSUB -8.47326 2.22836 -3.802
## T131:PRIMARYHHSUB -3.23920 3.39946 -0.953
## T132:PRIMARYHHSUB -6.01058 2.16528 -2.776
## T137:PRIMARYHHSUB -5.92944 2.09717 -2.827
## T141:PRIMARYHHSUB -6.66027 2.21639 -3.005
## T142:PRIMARYHHSUB -4.81160 2.14400 -2.244
## T143:PRIMARYHHSUB -7.80352 2.13199 -3.660
## T145:PRIMARYHHSUB -6.81338 2.15087 -3.168
## T152:PRIMARYHHSUB -8.31807 2.16058 -3.850
## T155:PRIMARYHHSUB -4.11441 3.35876 -1.225
##
## Correlation matrix not shown by default, as p = 132 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
anova(m1)
## Analysis of Variance Table
## npar Sum Sq Mean Sq F value
## CYEAR 1 1034212 1034212 5145.1393
## URBAN 1 67475 67475 335.6855
## T1 11 90947 8268 41.1323
## PRIMARY 7 30805 4401 21.8931
## CYEAR:URBAN 1 28974 28974 144.1417
## CYEAR:PRIMARY 7 47690 6813 33.8935
## URBAN:PRIMARY 7 3030 433 2.1536
## CYEAR:T1 11 97825 8893 44.2429
## URBAN:T1 11 13514 1229 6.1120
## T1:PRIMARY 74 48842 660 3.2836
qqmath(resid(m1))

plot(m1,resid(.,scaled=TRUE)~fitted(.),abline=0, xlab = "Fitted Values", ylab = "Residuals", main="Initial Model Residuals")

We note that the residual plot looks bad and has a fanned shape from left to right. We try a log transform of the response variable. Note that log cannot take on negative values, so we shift all values in the dataset by the minimum value in the dataset and add 0.01. This is the denoted as the variable THOUSANDS_SHIFTED, which we computed above.
# fit model w/ log transform w/ outliers
m2 = lmer(log(THOUSANDS_SHIFTED) ~ CYEAR + URBAN + T1 + PRIMARY +
CYEAR*URBAN + PRIMARY*CYEAR + URBAN*PRIMARY
+ CYEAR*T1 + URBAN*T1 + T1*PRIMARY
+ (1 | HHID), data=df)
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
summary(m2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(THOUSANDS_SHIFTED) ~ CYEAR + URBAN + T1 + PRIMARY + CYEAR *
## URBAN + PRIMARY * CYEAR + URBAN * PRIMARY + CYEAR * T1 +
## URBAN * T1 + T1 * PRIMARY + (1 | HHID)
## Data: df
##
## REML criterion at convergence: -107455.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -138.909 -0.270 -0.057 0.136 23.258
##
## Random effects:
## Groups Name Variance Std.Dev.
## HHID (Intercept) 0.0005046 0.02246
## Residual 0.0044727 0.06688
## Number of obs: 43671, groups: HHID, 9628
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 5.2550586 0.0293460 179.072
## CYEAR 0.0092445 0.0011382 8.122
## URBAN1 0.0425616 0.0083542 5.095
## T121 0.1685402 0.0294411 5.725
## T123 0.1259703 0.0297327 4.237
## T131 0.0713243 0.0416467 1.713
## T132 0.1743841 0.0294447 5.922
## T137 0.1731355 0.0294304 5.883
## T141 0.1685982 0.0294378 5.727
## T142 0.1666256 0.0294324 5.661
## T143 0.1906330 0.0294113 6.482
## T145 0.1755471 0.0293896 5.973
## T152 0.1654509 0.0294083 5.626
## T155 0.1501979 0.0406290 3.697
## PRIMARYHHFARM 0.0413012 0.0206219 2.003
## PRIMARYHHFISH -0.0314548 0.0740300 -0.425
## PRIMARYHHGARD 0.0329332 0.0156022 2.111
## PRIMARYHHLVST -0.0124218 0.0368227 -0.337
## PRIMARYHHOTHR 0.0403738 0.0089815 4.495
## PRIMARYHHRETIRE -0.0067145 0.0081535 -0.824
## PRIMARYHHSUB 0.0200021 0.0087296 2.291
## CYEAR:URBAN1 0.0003701 0.0001258 2.943
## CYEAR:PRIMARYHHFARM -0.0018322 0.0001516 -12.082
## CYEAR:PRIMARYHHFISH -0.0001523 0.0008295 -0.184
## CYEAR:PRIMARYHHGARD -0.0005356 0.0001509 -3.548
## CYEAR:PRIMARYHHLVST -0.0002802 0.0003285 -0.853
## CYEAR:PRIMARYHHOTHR -0.0004947 0.0001648 -3.003
## CYEAR:PRIMARYHHRETIRE 0.0008158 0.0001754 4.651
## CYEAR:PRIMARYHHSUB -0.0004172 0.0001813 -2.301
## URBAN1:PRIMARYHHFARM -0.0067629 0.0063100 -1.072
## URBAN1:PRIMARYHHFISH 0.0192441 0.0302786 0.636
## URBAN1:PRIMARYHHGARD -0.0011049 0.0044282 -0.250
## URBAN1:PRIMARYHHLVST 0.0027009 0.0091283 0.296
## URBAN1:PRIMARYHHOTHR 0.0019336 0.0025132 0.769
## URBAN1:PRIMARYHHRETIRE 0.0014745 0.0029157 0.506
## URBAN1:PRIMARYHHSUB 0.0046811 0.0031595 1.482
## CYEAR:T121 -0.0057717 0.0011414 -5.057
## CYEAR:T123 -0.0043374 0.0011527 -3.763
## CYEAR:T131 -0.0008384 0.0016031 -0.523
## CYEAR:T132 -0.0054180 0.0011409 -4.749
## CYEAR:T137 -0.0063704 0.0011397 -5.589
## CYEAR:T141 -0.0070055 0.0011397 -6.147
## CYEAR:T142 -0.0059912 0.0011400 -5.255
## CYEAR:T143 -0.0074354 0.0011403 -6.521
## CYEAR:T145 -0.0074547 0.0011398 -6.540
## CYEAR:T152 -0.0065829 0.0011401 -5.774
## CYEAR:T155 -0.0063080 0.0015979 -3.948
## URBAN1:T121 -0.0473368 0.0083780 -5.650
## URBAN1:T123 -0.0128469 0.0090252 -1.423
## URBAN1:T131 -0.0389441 0.0101721 -3.829
## URBAN1:T132 -0.0468964 0.0083897 -5.590
## URBAN1:T137 -0.0468388 0.0082888 -5.651
## URBAN1:T141 -0.0404064 0.0084899 -4.759
## URBAN1:T142 -0.0471390 0.0084249 -5.595
## URBAN1:T143 -0.0487929 0.0082902 -5.886
## URBAN1:T145 -0.0460297 0.0084264 -5.463
## URBAN1:T152 -0.0289733 0.0084662 -3.422
## URBAN1:T155 -0.0325769 0.0093852 -3.471
## T121:PRIMARYHHFARM -0.0341761 0.0207243 -1.649
## T123:PRIMARYHHFARM -0.0152346 0.0208186 -0.732
## T131:PRIMARYHHFARM -0.0328921 0.0542515 -0.606
## T132:PRIMARYHHFARM -0.0411895 0.0207143 -1.988
## T137:PRIMARYHHFARM -0.0350057 0.0206902 -1.692
## T141:PRIMARYHHFARM -0.0323764 0.0206371 -1.569
## T142:PRIMARYHHFARM -0.0296306 0.0206965 -1.432
## T143:PRIMARYHHFARM -0.0725540 0.0208218 -3.485
## T145:PRIMARYHHFARM -0.0323826 0.0206788 -1.566
## T152:PRIMARYHHFARM -0.0325656 0.0206666 -1.576
## T155:PRIMARYHHFARM -0.0048046 0.0239085 -0.201
## T121:PRIMARYHHFISH 0.0288273 0.0820042 0.352
## T132:PRIMARYHHFISH 0.0357645 0.0747390 0.479
## T137:PRIMARYHHFISH 0.0281831 0.0798660 0.353
## T141:PRIMARYHHFISH 0.0442552 0.0827339 0.535
## T142:PRIMARYHHFISH 0.0291210 0.0722135 0.403
## T143:PRIMARYHHFISH 0.0326858 0.0731650 0.447
## T145:PRIMARYHHFISH 0.0351151 0.0743432 0.472
## T152:PRIMARYHHFISH 0.0303029 0.0869223 0.349
## T121:PRIMARYHHGARD -0.0300917 0.0157505 -1.911
## T123:PRIMARYHHGARD -0.0244437 0.0160580 -1.522
## T131:PRIMARYHHGARD -0.0328620 0.0217274 -1.512
## T132:PRIMARYHHGARD -0.0333316 0.0157589 -2.115
## T137:PRIMARYHHGARD -0.0289047 0.0158825 -1.820
## T141:PRIMARYHHGARD -0.0306987 0.0158854 -1.933
## T142:PRIMARYHHGARD -0.0293501 0.0155936 -1.882
## T143:PRIMARYHHGARD -0.0421373 0.0156022 -2.701
## T145:PRIMARYHHGARD -0.0323557 0.0155970 -2.074
## T152:PRIMARYHHGARD -0.0309585 0.0156039 -1.984
## T155:PRIMARYHHGARD -0.0364870 0.0166526 -2.191
## T121:PRIMARYHHLVST 0.0172377 0.0373374 0.462
## T123:PRIMARYHHLVST 0.0448901 0.0376230 1.193
## T131:PRIMARYHHLVST -0.0354855 0.0509504 -0.696
## T132:PRIMARYHHLVST 0.0102568 0.0372649 0.275
## T137:PRIMARYHHLVST 0.0244022 0.0378063 0.645
## T141:PRIMARYHHLVST 0.0178229 0.0370488 0.481
## T142:PRIMARYHHLVST 0.0215238 0.0373147 0.577
## T143:PRIMARYHHLVST -0.0044711 0.0366397 -0.122
## T145:PRIMARYHHLVST 0.0114797 0.0365627 0.314
## T152:PRIMARYHHLVST 0.0133457 0.0368259 0.362
## T155:PRIMARYHHLVST -0.0077738 0.0415474 -0.187
## T121:PRIMARYHHOTHR -0.0449265 0.0091304 -4.921
## T123:PRIMARYHHOTHR -0.0352599 0.0093919 -3.754
## T131:PRIMARYHHOTHR -0.0250123 0.0125662 -1.990
## T132:PRIMARYHHOTHR -0.0427954 0.0089565 -4.778
## T137:PRIMARYHHOTHR -0.0401672 0.0088472 -4.540
## T141:PRIMARYHHOTHR -0.0358910 0.0087883 -4.084
## T142:PRIMARYHHOTHR -0.0478567 0.0089024 -5.376
## T143:PRIMARYHHOTHR -0.0442005 0.0086756 -5.095
## T145:PRIMARYHHOTHR -0.0368456 0.0087355 -4.218
## T152:PRIMARYHHOTHR -0.0489143 0.0087165 -5.612
## T155:PRIMARYHHOTHR -0.0273605 0.0111151 -2.462
## T121:PRIMARYHHRETIRE 0.0085013 0.0076208 1.116
## T123:PRIMARYHHRETIRE -0.0096738 0.0087065 -1.111
## T131:PRIMARYHHRETIRE -0.0226773 0.0101822 -2.227
## T132:PRIMARYHHRETIRE -0.0059007 0.0077780 -0.759
## T137:PRIMARYHHRETIRE 0.0035008 0.0076273 0.459
## T141:PRIMARYHHRETIRE 0.0068253 0.0079813 0.855
## T142:PRIMARYHHRETIRE -0.0113179 0.0077892 -1.453
## T143:PRIMARYHHRETIRE 0.0031822 0.0078853 0.404
## T145:PRIMARYHHRETIRE -0.0047902 0.0077104 -0.621
## T152:PRIMARYHHRETIRE 0.0002537 0.0079037 0.032
## T155:PRIMARYHHRETIRE 0.0113551 0.0097214 1.168
## T121:PRIMARYHHSUB -0.0112021 0.0087526 -1.280
## T123:PRIMARYHHSUB -0.0229826 0.0091654 -2.508
## T131:PRIMARYHHSUB -0.0059112 0.0137822 -0.429
## T132:PRIMARYHHSUB -0.0148314 0.0088362 -1.678
## T137:PRIMARYHHSUB -0.0152182 0.0085786 -1.774
## T141:PRIMARYHHSUB -0.0150387 0.0089959 -1.672
## T142:PRIMARYHHSUB -0.0097392 0.0088196 -1.104
## T143:PRIMARYHHSUB -0.0238762 0.0087210 -2.738
## T145:PRIMARYHHSUB -0.0182125 0.0088387 -2.061
## T152:PRIMARYHHSUB -0.0216936 0.0088343 -2.456
## T155:PRIMARYHHSUB -0.0130575 0.0136407 -0.957
##
## Correlation matrix not shown by default, as p = 132 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
qqmath(resid(m2))

plot(m2,resid(.,scaled=TRUE)~fitted(.),abline=0, xlab = "Fitted Values", ylab = "Residuals", main="Logged Model Residuals")

It looks like the log transform improved the residual plots a bit. However, is this because the plot is zoomed out so much, or is it actually making a difference? Let’s remove the outliers, which are causing the zoom out, and reevaluate the effect of the log transform.
# fit model w/ log transform w/o outliers
m3 = lmer(log(THOUSANDS_SHIFTED) ~ CYEAR + URBAN + T1 + PRIMARY +
CYEAR*URBAN + PRIMARY*CYEAR + URBAN*PRIMARY
+ CYEAR*T1 + URBAN*T1 + T1*PRIMARY
+ (1 | HHID), data=df_trimmed)
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
summary(m3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(THOUSANDS_SHIFTED) ~ CYEAR + URBAN + T1 + PRIMARY + CYEAR *
## URBAN + PRIMARY * CYEAR + URBAN * PRIMARY + CYEAR * T1 +
## URBAN * T1 + T1 * PRIMARY + (1 | HHID)
## Data: df_trimmed
##
## REML criterion at convergence: 82721.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -9.5981 -0.5149 0.0153 0.5331 6.2093
##
## Random effects:
## Groups Name Variance Std.Dev.
## HHID (Intercept) 0.1116 0.3341
## Residual 0.3194 0.5651
## Number of obs: 43583, groups: HHID, 9623
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.5618304 0.2581509 6.050
## CYEAR 0.0430227 0.0098913 4.350
## URBAN1 0.3160395 0.0798599 3.957
## T121 0.0146482 0.2592921 0.056
## T123 -0.7170148 0.2619674 -2.737
## T131 0.2014195 0.3661220 0.550
## T132 0.1329570 0.2593712 0.513
## T137 -0.0269323 0.2592029 -0.104
## T141 -0.1838794 0.2592722 -0.709
## T142 -0.1753367 0.2592573 -0.676
## T143 0.0678627 0.2590604 0.262
## T145 -0.0133963 0.2588767 -0.052
## T152 -0.1956473 0.2589818 -0.755
## T155 -0.0024103 0.3552454 -0.007
## PRIMARYHHFARM -0.1384962 0.1855914 -0.746
## PRIMARYHHFISH -0.2240370 0.6854126 -0.327
## PRIMARYHHGARD -0.1492003 0.1436812 -1.038
## PRIMARYHHLVST -0.8132208 0.3327167 -2.444
## PRIMARYHHOTHR 0.1795430 0.0821877 2.185
## PRIMARYHHRETIRE -0.1162181 0.0755399 -1.539
## PRIMARYHHSUB 0.0042437 0.0795875 0.053
## CYEAR:URBAN1 0.0035260 0.0011303 3.120
## CYEAR:PRIMARYHHFARM -0.0063348 0.0013276 -4.772
## CYEAR:PRIMARYHHFISH 0.0033799 0.0072426 0.467
## CYEAR:PRIMARYHHGARD 0.0089151 0.0013218 6.745
## CYEAR:PRIMARYHHLVST 0.0095225 0.0028547 3.336
## CYEAR:PRIMARYHHOTHR -0.0053007 0.0014460 -3.666
## CYEAR:PRIMARYHHRETIRE 0.0166843 0.0015656 10.657
## CYEAR:PRIMARYHHSUB 0.0004642 0.0016082 0.289
## URBAN1:PRIMARYHHFARM -0.0394113 0.0567223 -0.695
## URBAN1:PRIMARYHHFISH 0.1679606 0.2641293 0.636
## URBAN1:PRIMARYHHGARD -0.0259260 0.0397746 -0.652
## URBAN1:PRIMARYHHLVST 0.0652337 0.0798325 0.817
## URBAN1:PRIMARYHHOTHR -0.0169596 0.0224079 -0.757
## URBAN1:PRIMARYHHRETIRE 0.0504328 0.0270507 1.864
## URBAN1:PRIMARYHHSUB 0.1086176 0.0284339 3.820
## CYEAR:T121 0.0069811 0.0099227 0.704
## CYEAR:T123 0.0262604 0.0100198 2.621
## CYEAR:T131 0.0095234 0.0139345 0.683
## CYEAR:T132 0.0079582 0.0099176 0.802
## CYEAR:T137 -0.0019718 0.0099060 -0.199
## CYEAR:T141 -0.0109794 0.0099049 -1.108
## CYEAR:T142 0.0027835 0.0099058 0.281
## CYEAR:T143 -0.0116312 0.0099099 -1.174
## CYEAR:T145 -0.0160639 0.0099062 -1.622
## CYEAR:T152 -0.0036789 0.0099088 -0.371
## CYEAR:T155 -0.0228148 0.0138438 -1.648
## URBAN1:T121 -0.3438524 0.0821327 -4.187
## URBAN1:T123 0.2240949 0.0876493 2.557
## URBAN1:T131 -0.2653309 0.0991733 -2.675
## URBAN1:T132 -0.3924686 0.0822641 -4.771
## URBAN1:T137 -0.3556785 0.0813637 -4.371
## URBAN1:T141 -0.1330007 0.0832333 -1.598
## URBAN1:T142 -0.2787304 0.0831610 -3.352
## URBAN1:T143 -0.2763176 0.0816202 -3.385
## URBAN1:T145 -0.3506235 0.0825019 -4.250
## URBAN1:T152 -0.0183336 0.0829218 -0.221
## URBAN1:T155 -0.1033889 0.0909590 -1.137
## T121:PRIMARYHHFARM -0.1926021 0.1867068 -1.032
## T123:PRIMARYHHFARM 0.2249867 0.1876227 1.199
## T131:PRIMARYHHFARM -0.1688836 0.4902134 -0.345
## T132:PRIMARYHHFARM -0.1689360 0.1865117 -0.906
## T137:PRIMARYHHFARM -0.0277265 0.1863729 -0.149
## T141:PRIMARYHHFARM -0.0762178 0.1858284 -0.410
## T142:PRIMARYHHFARM -0.0282780 0.1864352 -0.152
## T143:PRIMARYHHFARM -0.1295494 0.1874575 -0.691
## T145:PRIMARYHHFARM -0.1111607 0.1861745 -0.597
## T152:PRIMARYHHFARM -0.1011704 0.1860285 -0.544
## T155:PRIMARYHHFARM 0.5160301 0.2172880 2.375
## T121:PRIMARYHHFISH 0.3568243 0.7492528 0.476
## T132:PRIMARYHHFISH 0.3194231 0.6909858 0.462
## T137:PRIMARYHHFISH 0.2834861 0.7345091 0.386
## T141:PRIMARYHHFISH 0.5274449 0.7599429 0.694
## T142:PRIMARYHHFISH 0.2415545 0.6709912 0.360
## T143:PRIMARYHHFISH 0.4538453 0.6794552 0.668
## T145:PRIMARYHHFISH 0.3367605 0.6878997 0.490
## T152:PRIMARYHHFISH 0.1305119 0.7885433 0.166
## T121:PRIMARYHHGARD -0.1621139 0.1452369 -1.116
## T123:PRIMARYHHGARD 0.0771567 0.1479006 0.522
## T131:PRIMARYHHGARD -0.2279779 0.2025880 -1.125
## T132:PRIMARYHHGARD -0.0845802 0.1450825 -0.583
## T137:PRIMARYHHGARD 0.1198030 0.1462495 0.819
## T141:PRIMARYHHGARD 0.0125943 0.1460937 0.086
## T142:PRIMARYHHGARD 0.0062095 0.1437221 0.043
## T143:PRIMARYHHGARD -0.0449206 0.1437907 -0.312
## T145:PRIMARYHHGARD -0.0809027 0.1437526 -0.563
## T152:PRIMARYHHGARD -0.0236003 0.1437133 -0.164
## T155:PRIMARYHHGARD 0.0757281 0.1540392 0.492
## T121:PRIMARYHHLVST 0.4768837 0.3370989 1.415
## T123:PRIMARYHHLVST 1.1860814 0.3400202 3.488
## T131:PRIMARYHHLVST 0.1853870 0.4615335 0.402
## T132:PRIMARYHHLVST 0.5719443 0.3365163 1.700
## T137:PRIMARYHHLVST 0.8743785 0.3409695 2.564
## T141:PRIMARYHHLVST 0.7971393 0.3345954 2.382
## T142:PRIMARYHHLVST 0.7258149 0.3368771 2.155
## T143:PRIMARYHHLVST 0.3801149 0.3312613 1.147
## T145:PRIMARYHHLVST 0.5781784 0.3306270 1.749
## T152:PRIMARYHHLVST 0.5408523 0.3327848 1.625
## T155:PRIMARYHHLVST 0.4866854 0.3779092 1.288
## T121:PRIMARYHHOTHR -0.3280062 0.0837051 -3.919
## T123:PRIMARYHHOTHR -0.1298177 0.0861693 -1.507
## T131:PRIMARYHHOTHR -0.1703986 0.1156399 -1.474
## T132:PRIMARYHHOTHR -0.1887592 0.0821778 -2.297
## T137:PRIMARYHHOTHR -0.2041672 0.0812045 -2.514
## T141:PRIMARYHHOTHR -0.2007368 0.0807209 -2.487
## T142:PRIMARYHHOTHR -0.3418783 0.0815589 -4.192
## T143:PRIMARYHHOTHR -0.2489674 0.0797648 -3.121
## T145:PRIMARYHHOTHR -0.1821113 0.0802630 -2.269
## T152:PRIMARYHHOTHR -0.3405880 0.0800679 -4.254
## T155:PRIMARYHHOTHR 0.0591478 0.1023612 0.578
## T121:PRIMARYHHRETIRE 0.0861324 0.0716166 1.203
## T123:PRIMARYHHRETIRE -0.0861180 0.0812796 -1.060
## T131:PRIMARYHHRETIRE -0.2966162 0.0952242 -3.115
## T132:PRIMARYHHRETIRE -0.0235444 0.0729320 -0.323
## T137:PRIMARYHHRETIRE 0.1376026 0.0713851 1.928
## T141:PRIMARYHHRETIRE 0.1580941 0.0747122 2.116
## T142:PRIMARYHHRETIRE -0.0721954 0.0725979 -0.994
## T143:PRIMARYHHRETIRE 0.1231290 0.0737136 1.670
## T145:PRIMARYHHRETIRE 0.0111629 0.0720137 0.155
## T152:PRIMARYHHRETIRE -0.0228738 0.0737762 -0.310
## T155:PRIMARYHHRETIRE 0.3507000 0.0915713 3.830
## T121:PRIMARYHHSUB -0.0354432 0.0802377 -0.442
## T123:PRIMARYHHSUB -0.1092376 0.0836217 -1.306
## T131:PRIMARYHHSUB -0.0610588 0.1266517 -0.482
## T132:PRIMARYHHSUB -0.0372753 0.0809144 -0.461
## T137:PRIMARYHHSUB -0.0350315 0.0784096 -0.447
## T141:PRIMARYHHSUB -0.0803380 0.0826573 -0.972
## T142:PRIMARYHHSUB 0.0334957 0.0804020 0.417
## T143:PRIMARYHHSUB -0.1176882 0.0797352 -1.476
## T145:PRIMARYHHSUB -0.1458065 0.0805663 -1.810
## T152:PRIMARYHHSUB -0.1596282 0.0808367 -1.975
## T155:PRIMARYHHSUB 0.0341838 0.1246815 0.274
##
## Correlation matrix not shown by default, as p = 132 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
qqmath(resid(m3))

plot(m3,resid(.,scaled=TRUE)~fitted(.),abline=0, xlab = "Fitted Values", ylab = "Residuals", main="Residuals of log model without outliers")

We see that indeed, the log transformation did not do much to help the residuals plot. We proceed without a log transform for sake of simplicity and for interpretability reasons.
Model Selection
primary is significant
# no PRIMARY
m1 = lmer(THOUSANDS ~ CYEAR + URBAN + T1 + (1 |HHID), data=df, REML=FALSE)
# PRIMARY
m2 = lmer(THOUSANDS ~ CYEAR + URBAN + T1 + PRIMARY +(1 |HHID), data=df, REML=FALSE)
anova(m1, m2)
## Data: df
## Models:
## m1: THOUSANDS ~ CYEAR + URBAN + T1 + (1 | HHID)
## m2: THOUSANDS ~ CYEAR + URBAN + T1 + PRIMARY + (1 | HHID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m1 16 368977 369116 -184472 368945
## m2 23 368841 369041 -184398 368795 149.28 7 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
2 way interactions
variables_of_interest = c("CYEAR * URBAN",
"CYEAR * T1",
"CYEAR * PRIMARY",
"URBAN * T1",
"URBAN * PRIMARY",
"T1 * PRIMARY")
remaining_variables_of_interest = variables_of_interest
current_variables = c("CYEAR",
"URBAN",
"T1",
"PRIMARY",
"(1|HHID)")
current_formula = as.formula(paste("THOUSANDS ~ ",paste(current_variables, collapse="+"),sep = ""))
while(TRUE){
print("current baseline")
print(current_variables)
# get baseline
current_formula = as.formula(paste("THOUSANDS ~ ",paste(current_variables, collapse="+"),sep = ""))
baseline = lmer(current_formula, data=df, REML=FALSE)
# loop through remaining variables of interest and create a new model
models = c()
for (var in remaining_variables_of_interest){
curr_var_loop = c(current_variables, var)
current_formula = as.formula(paste("THOUSANDS~ ",paste(curr_var_loop, collapse="+"),sep = ""))
new_model = lmer(current_formula, data=df, REML=FALSE)
models = c(models, new_model)
}
# perform anova on baseline and a new model, record the resulting p-value
p_vals = c()
for (m in models){
anova_result = anova(baseline, m)
p_vals= c(p_vals, anova_result$`Pr(>Chisq)`[2])
}
# if the smallest p-value is greater than 0.05, stop because no new variable is significant
if (min(p_vals)>0.05){
print("no remaining significant variables of interest")
break
}
print(paste("selected", remaining_variables_of_interest[which.min(p_vals)], ", p-value of", min(p_vals) ))
# add most signficant variable to model
current_variables = c(current_variables, remaining_variables_of_interest[which.min(p_vals)])
# remove the most significant variable from search space
remaining_variables_of_interest = remaining_variables_of_interest[-which.min(p_vals)]
if (length(remaining_variables_of_interest)==0){
break
}
}
## [1] "current baseline"
## [1] "CYEAR" "URBAN" "T1" "PRIMARY" "(1|HHID)"
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## [1] "selected CYEAR * T1 , p-value of 3.54917455359114e-104"
## [1] "current baseline"
## [1] "CYEAR" "URBAN" "T1" "PRIMARY" "(1|HHID)"
## [6] "CYEAR * T1"
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## [1] "selected CYEAR * PRIMARY , p-value of 7.1108351681761e-64"
## [1] "current baseline"
## [1] "CYEAR" "URBAN" "T1" "PRIMARY"
## [5] "(1|HHID)" "CYEAR * T1" "CYEAR * PRIMARY"
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## [1] "selected T1 * PRIMARY , p-value of 8.23281763048671e-18"
## [1] "current baseline"
## [1] "CYEAR" "URBAN" "T1" "PRIMARY"
## [5] "(1|HHID)" "CYEAR * T1" "CYEAR * PRIMARY" "T1 * PRIMARY"
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## [1] "selected URBAN * T1 , p-value of 6.45576046737603e-17"
## [1] "current baseline"
## [1] "CYEAR" "URBAN" "T1" "PRIMARY"
## [5] "(1|HHID)" "CYEAR * T1" "CYEAR * PRIMARY" "T1 * PRIMARY"
## [9] "URBAN * T1"
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## [1] "selected CYEAR * URBAN , p-value of 0.000373927288152989"
## [1] "current baseline"
## [1] "CYEAR" "URBAN" "T1" "PRIMARY"
## [5] "(1|HHID)" "CYEAR * T1" "CYEAR * PRIMARY" "T1 * PRIMARY"
## [9] "URBAN * T1" "CYEAR * URBAN"
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## [1] "no remaining significant variables of interest"
two_way_model = as.formula(paste("THOUSANDS ~ ",paste(current_variables, collapse="+"),sep = ""))
two_way_model
## THOUSANDS ~ CYEAR + URBAN + T1 + PRIMARY + (1 | HHID) + CYEAR *
## T1 + CYEAR * PRIMARY + T1 * PRIMARY + URBAN * T1 + CYEAR *
## URBAN
3 way interaction
# CYEAR, URBAN, T1, PRIMARY
variables_of_interest = c("CYEAR * URBAN * T1",
"CYEAR * URBAN * PRIMARY",
"CYEAR * T1 * PRIMARY",
"URBAN * CYEAR * PRIMARY")
remaining_variables_of_interest = variables_of_interest
current_formula = two_way_model
while(TRUE){
print("current baseline")
print(current_variables)
# get baseline
current_formula = as.formula(paste("THOUSANDS ~ ",paste(current_variables, collapse="+"),sep = ""))
baseline = lmer(current_formula, data=df, REML=FALSE)
# loop through remaining variables of interest and create a new model
models = c()
for (var in remaining_variables_of_interest){
curr_var_loop = c(current_variables, var)
current_formula = as.formula(paste("THOUSANDS ~ ",paste(curr_var_loop, collapse="+"),sep = ""))
new_model = lmer(current_formula, data=df, REML=FALSE)
models = c(models, new_model)
}
p_vals = c()
for (m in models){
anova_result = anova(baseline, m)
p_vals= c(p_vals, anova_result$`Pr(>Chisq)`[2])
}
if (min(p_vals)>0.05){
print("no remaining significant variables of interest")
break
}
print(paste("selected", remaining_variables_of_interest[which.min(p_vals)], ", p-value of", min(p_vals) ))
# add most signficant variable to model
current_variables = c(current_variables, remaining_variables_of_interest[which.min(p_vals)])
# remove the most significant variable from search space
remaining_variables_of_interest = remaining_variables_of_interest[-which.min(p_vals)]
if (length(remaining_variables_of_interest)==0){
break
}
}
## [1] "current baseline"
## [1] "CYEAR" "URBAN" "T1" "PRIMARY"
## [5] "(1|HHID)" "CYEAR * T1" "CYEAR * PRIMARY" "T1 * PRIMARY"
## [9] "URBAN * T1" "CYEAR * URBAN"
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 8 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## [1] "selected CYEAR * T1 * PRIMARY , p-value of 5.47163495513843e-20"
## [1] "current baseline"
## [1] "CYEAR" "URBAN" "T1"
## [4] "PRIMARY" "(1|HHID)" "CYEAR * T1"
## [7] "CYEAR * PRIMARY" "T1 * PRIMARY" "URBAN * T1"
## [10] "CYEAR * URBAN" "CYEAR * T1 * PRIMARY"
## fixed-effect model matrix is rank deficient so dropping 8 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 8 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 8 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 8 columns / coefficients
## [1] "selected CYEAR * URBAN * T1 , p-value of 1.31805736103392e-11"
## [1] "current baseline"
## [1] "CYEAR" "URBAN" "T1"
## [4] "PRIMARY" "(1|HHID)" "CYEAR * T1"
## [7] "CYEAR * PRIMARY" "T1 * PRIMARY" "URBAN * T1"
## [10] "CYEAR * URBAN" "CYEAR * T1 * PRIMARY" "CYEAR * URBAN * T1"
## fixed-effect model matrix is rank deficient so dropping 8 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 8 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 8 columns / coefficients
## [1] "no remaining significant variables of interest"
three_way_model = as.formula(paste("THOUSANDS ~ ",paste(current_variables, collapse="+"),sep = ""))
three_way_model
## THOUSANDS ~ CYEAR + URBAN + T1 + PRIMARY + (1 | HHID) + CYEAR *
## T1 + CYEAR * PRIMARY + T1 * PRIMARY + URBAN * T1 + CYEAR *
## URBAN + CYEAR * T1 * PRIMARY + CYEAR * URBAN * T1
four way model
m1 = lmer(THOUSANDS ~ URBAN + CYEAR + T1 + PRIMARY + CYEAR*URBAN + CYEAR*PRIMARY
+ CYEAR*T1 + URBAN*T1 + CYEAR * T1 * PRIMARY + CYEAR * URBAN * T1
+ (1|HHID), REML = FALSE, data = df)
## fixed-effect model matrix is rank deficient so dropping 8 columns / coefficients
m2 = lmer(THOUSANDS ~ URBAN + CYEAR + T1 + PRIMARY + CYEAR*URBAN + CYEAR*PRIMARY
+ CYEAR*T1 + URBAN*T1 + CYEAR * T1 * PRIMARY + CYEAR * URBAN * T1
+ CYEAR * URBAN * T1 * PRIMARY
+ (1|HHID), REML = FALSE, data = df)
## fixed-effect model matrix is rank deficient so dropping 52 columns / coefficients
anova(m1, m2)
## Data: df
## Models:
## m1: THOUSANDS ~ URBAN + CYEAR + T1 + PRIMARY + CYEAR * URBAN + CYEAR *
## m1: PRIMARY + CYEAR * T1 + URBAN * T1 + CYEAR * T1 * PRIMARY +
## m1: CYEAR * URBAN * T1 + (1 | HHID)
## m2: THOUSANDS ~ URBAN + CYEAR + T1 + PRIMARY + CYEAR * URBAN + CYEAR *
## m2: PRIMARY + CYEAR * T1 + URBAN * T1 + CYEAR * T1 * PRIMARY +
## m2: CYEAR * URBAN * T1 + CYEAR * URBAN * T1 * PRIMARY + (1 |
## m2: HHID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m1 210 367724 369547 -183652 367304
## m2 334 367667 370568 -183499 366999 304.52 124 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
four_way_model = as.formula("THOUSANDS ~ URBAN + CYEAR + T1 + PRIMARY + CYEAR*URBAN + CYEAR*PRIMARY + CYEAR*T1 + URBAN*T1 + CYEAR * T1 * PRIMARY + CYEAR * URBAN * T1 + CYEAR * URBAN * T1 * PRIMARY + (1|HHID)")
four_way_model
## THOUSANDS ~ URBAN + CYEAR + T1 + PRIMARY + CYEAR * URBAN + CYEAR *
## PRIMARY + CYEAR * T1 + URBAN * T1 + CYEAR * T1 * PRIMARY +
## CYEAR * URBAN * T1 + CYEAR * URBAN * T1 * PRIMARY + (1 |
## HHID)
final model
final_model = lmer(THOUSANDS ~ URBAN + CYEAR + T1 + PRIMARY + CYEAR*URBAN + CYEAR*PRIMARY
+ CYEAR*T1 + URBAN*T1
+ (1|HHID), REML = FALSE, data = df)
summary(final_model)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: THOUSANDS ~ URBAN + CYEAR + T1 + PRIMARY + CYEAR * URBAN + CYEAR *
## PRIMARY + CYEAR * T1 + URBAN * T1 + (1 | HHID)
## Data: df
##
## AIC BIC logLik deviance df.resid
## 367974.6 368434.9 -183934.3 367868.6 43618
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -13.898 -0.286 -0.049 0.164 46.163
##
## Random effects:
## Groups Name Variance Std.Dev.
## HHID (Intercept) 141.5 11.90
## Residual 201.0 14.18
## Number of obs: 43671, groups: HHID, 9628
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -47.71427 6.11412 -7.804
## URBAN1 9.62540 1.94467 4.950
## CYEAR 2.74769 0.24469 11.229
## T121 47.87037 6.14575 7.789
## T123 39.40122 6.18778 6.368
## T131 19.85373 8.60380 2.308
## T132 48.29414 6.14752 7.856
## T137 49.57922 6.14380 8.070
## T141 48.05763 6.14415 7.822
## T142 48.60362 6.14609 7.908
## T143 49.28215 6.14622 8.018
## T145 49.23402 6.14583 8.011
## T152 47.12451 6.14404 7.670
## T155 42.61613 8.51865 5.003
## PRIMARYHHFARM 1.77776 0.49518 3.590
## PRIMARYHHFISH 1.54722 2.21080 0.700
## PRIMARYHHGARD 0.82057 0.54850 1.496
## PRIMARYHHLVST 1.11212 0.88774 1.253
## PRIMARYHHOTHR -0.89804 0.60974 -1.473
## PRIMARYHHRETIRE -2.18280 0.71044 -3.072
## PRIMARYHHSUB 0.66017 0.54497 1.211
## URBAN1:CYEAR 0.11092 0.02641 4.200
## CYEAR:PRIMARYHHFARM -0.35200 0.03218 -10.938
## CYEAR:PRIMARYHHFISH -0.10889 0.16054 -0.678
## CYEAR:PRIMARYHHGARD -0.13247 0.03219 -4.116
## CYEAR:PRIMARYHHLVST -0.10558 0.06791 -1.555
## CYEAR:PRIMARYHHOTHR -0.05971 0.03541 -1.686
## CYEAR:PRIMARYHHRETIRE 0.14120 0.03804 3.712
## CYEAR:PRIMARYHHSUB -0.13978 0.03726 -3.752
## CYEAR:T121 -1.86217 0.24607 -7.568
## CYEAR:T123 -1.55856 0.24872 -6.266
## CYEAR:T131 -0.44391 0.34442 -1.289
## CYEAR:T132 -1.80510 0.24598 -7.338
## CYEAR:T137 -2.05459 0.24586 -8.357
## CYEAR:T141 -2.22682 0.24579 -9.060
## CYEAR:T142 -1.99293 0.24559 -8.115
## CYEAR:T143 -2.14820 0.24572 -8.742
## CYEAR:T145 -2.30401 0.24567 -9.378
## CYEAR:T152 -2.13290 0.24567 -8.682
## CYEAR:T155 -1.99448 0.34542 -5.774
## URBAN1:T121 -9.27064 2.10199 -4.410
## URBAN1:T123 -3.67630 2.13020 -1.726
## URBAN1:T131 -12.06342 2.62203 -4.601
## URBAN1:T132 -9.60730 2.09039 -4.596
## URBAN1:T137 -9.78458 2.09554 -4.669
## URBAN1:T141 -7.25441 2.09974 -3.455
## URBAN1:T142 -10.94066 2.14117 -5.110
## URBAN1:T143 -9.57577 2.11082 -4.537
## URBAN1:T145 -8.70572 2.09215 -4.161
## URBAN1:T152 -5.44761 2.10445 -2.589
## URBAN1:T155 -5.38565 2.31150 -2.330
##
## Correlation matrix not shown by default, as p = 51 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
model diagnostics
qqmath(resid(final_model))

plot(final_model,resid(.,scaled=TRUE)~fitted(.),abline=0, main="Final Model Residuals")

vif(final_model)
## GVIF Df GVIF^(1/(2*Df))
## URBAN 4.290689e+01 1 6.550335
## CYEAR 6.981445e+02 1 26.422424
## T1 5.303258e+08 11 2.492127
## PRIMARY 5.295697e+03 7 1.844992
## URBAN:CYEAR 4.458281e+00 1 2.111464
## CYEAR:PRIMARY 1.026724e+04 7 1.934338
## CYEAR:T1 3.844677e+08 11 2.455958
## URBAN:T1 2.610188e+04 11 1.587661
cross validation
HHIDs = unique(df$HHID)
# do random sampling
n_times = 10
# 80 - 20 test split
test_size = 5
our_MSEs = c()
two_way_MSEs = c()
three_way_MSEs = c()
four_way_MSEs = c()
set.seed(0)
for(i in 1:test_size){
HHIDs = sample(HHIDs)
# split into train and test folds
test_indices = seq(i, length(HHIDs), test_size)
train_fold_HHIDs = HHIDs[-test_indices]
test_fold_HHIDs = HHIDs[test_indices]
train_fold = df[df$HHID %in% train_fold_HHIDs,]
test_fold = df[df$HHID %in% test_fold_HHIDs,]
#create a new model on train_fold
our_model = lmer(THOUSANDS ~ URBAN + CYEAR + T1 + PRIMARY + CYEAR*URBAN + CYEAR*PRIMARY
+ CYEAR*T1 + URBAN*T1
+ (1|HHID), REML = FALSE, data = train_fold)
predictions = predict(our_model, test_fold, allow.new.levels=TRUE)
residuals = (predictions) - (test_fold$THOUSANDS)
MSE = mean(residuals^2)
our_MSEs = c(our_MSEs, MSE)
two_way = lmer(two_way_model, data=train_fold, REML=FALSE)
predictions = predict(two_way, test_fold, allow.new.levels=TRUE)
residuals = (predictions) - (test_fold$THOUSANDS)
MSE = mean(residuals^2)
two_way_MSEs = c(two_way_MSEs, MSE)
three_way = lmer(three_way_model, data=train_fold, REML=FALSE)
predictions = predict(three_way, test_fold, allow.new.levels=TRUE)
residuals = (predictions) - (test_fold$THOUSANDS)
MSE = mean(residuals^2)
three_way_MSEs = c(three_way_MSEs, MSE)
four_way = lmer(four_way_model, data=train_fold, REML=FALSE)
predictions = predict(four_way, test_fold, allow.new.levels=TRUE)
residuals = (predictions) - (test_fold$THOUSANDS)
MSE = mean(residuals^2)
four_way_MSEs = c(four_way_MSEs, MSE)
}
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 10 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 55 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 10 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 57 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 10 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 55 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 9 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 58 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 10 columns / coefficients
## fixed-effect model matrix is rank deficient so dropping 55 columns / coefficients
mean(our_MSEs)
## [1] 240.0926
sd(our_MSEs)
## [1] 21.19593
mean(two_way_MSEs)
## [1] 239.9553
sd(two_way_MSEs)
## [1] 21.07868
mean(three_way_MSEs)
## [1] 240.6549
sd(three_way_MSEs)
## [1] 20.79697
mean(four_way_MSEs)
## [1] 244.3279
sd(four_way_MSEs)
## [1] 22.25931
Income Inequality
library(ineq)
library(tidyverse)
#gini index overtime
gini.years = df %>%
dplyr::select(HHID,HHINCPC_CPI,WAVE) %>%
arrange(HHINCPC_CPI) %>%
transform(HHINCPC_CPI = ifelse(HHINCPC_CPI < 0, 0, HHINCPC_CPI)) %>%
group_by(WAVE) %>%
summarise(index = ineq(HHINCPC_CPI,type = c("Gini")))
p1 = ggplot(gini.years,aes(x = factor(WAVE),y = index)) +
geom_line() +
geom_point()+
geom_text(aes(label = round(index, 2)),hjust = -0.2,vjust = 0.5) +
ggtitle("Gini Index Over Year in China") +
xlab("Year") + ylab("Gini Index") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))
years = c(1989,1991,1993,1997,2000,2004,2006,2009,2011,2015)
cols <- c("#ffe6e6","#ffcccc","#ff8080","#ff8080","#ff1a1a","#ff1a1a","#b30000","#660000","#330000")
plot(Lc(df[df$WAVE ==1989,]$HHINCPC_CPI),lwd=2,col = "#ffe6e6")
for (i in 2:length(years)){
lines(Lc(df[df$WAVE==years[i],]$HHINCPC_CPI),col=cols[i],lwd=2)
}

#png("gini_year.png")
#print(p1)
#dev.off()
#gini index overtime and provinces
df = df %>%
mutate(city = case_when(T1 == 11 ~ "Beijing",
T1 == 21 ~ "LiaoNing",
T1 == 23 ~ "Heilongjiang",
T1 == 31 ~ "Shanghai",
T1 == 32 ~ "Jiangsu",
T1 == 33 ~ "Zhejiang",
T1 == 37 ~ "Shandong",
T1 == 41 ~ "Henan",
T1 == 42 ~ "Hubei",
T1 == 43 ~ "Hunan",
T1 == 45 ~ "Guangxi",
T1 == 52 ~ "Guizhou",
T1 == 53 ~ "Yunnan",
T1 == 55 ~ "Chongqing",
T1 == 61 ~ "Shanxi"))
gini.years$city = c("China","China","China","China","China","China","China","China","China","China")
gini.years.province = df %>%
dplyr::select(HHID,HHINCPC_CPI,T1,WAVE,city) %>%
#arrange(HHINCPC_CPI) %>%
transform(HHINCPC_CPI = ifelse(HHINCPC_CPI < 0, 0, HHINCPC_CPI)) %>%
group_by(city,WAVE) %>%
summarise(index = ineq(HHINCPC_CPI,type = c("Gini")))
p2 = ggplot(gini.years.province,aes(x = factor(WAVE),y = index,group = city,color = city)) +
geom_line() +
geom_point()+
geom_line(data = gini.years,aes(x = factor(WAVE),y = index),color = "black") +
ggtitle("Gini Index Over Year by Province") +
xlab("Year") + ylab("Gini Index") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))
#png("gini_year_province.png")
#print(p2)
#dev.off()
df = df %>%
mutate(primary_income = case_when(PRIMARY == "HHBUS"~ HHBUS,
PRIMARY == "HHFARM"~ HHFARM,
PRIMARY == "HHFISH"~ HHFISH,
PRIMARY == "HHGARD"~ HHGARD,
PRIMARY == "HHLVST"~ HHLVST,
PRIMARY == "HHNRWAGE"~ HHNRWAGE,
PRIMARY == "HHOTHR"~ HHOTHR,
PRIMARY == "HHRETIRE"~ HHRETIRE,
PRIMARY == "HHSUB"~ HHSUB))
gini.years.primary = df %>%
dplyr::select(HHID,primary_income,T1,WAVE,city,PRIMARY) %>%
transform(primary_income = ifelse(primary_income < 0, 0, primary_income)) %>%
group_by(PRIMARY,WAVE) %>%
summarise(index = ineq(primary_income,type = c("Gini")))
ggplot(gini.years.primary,aes(x = WAVE,y = index,group = PRIMARY,color = PRIMARY)) +
geom_line()+
#geom_point()+
ggtitle("Gini Index Over Year by Primary Outcome") +
xlab("Year") + ylab("Gini Index") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))

ggplot(gini.years.primary,aes(x = WAVE,y = index)) +
geom_line() +
geom_point()+
#geom_text(aes(label = round(index, 3),hjust = -0.3,vjust = 0.5)) +
ggtitle("Gini Index Over Year") +
xlab("Year") + ylab("Gini Index") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))+
facet_wrap(~PRIMARY)

gini.years.urban = df %>%
dplyr::select(HHID,primary_income,T1,WAVE,URBAN) %>%
transform(primary_income = ifelse(primary_income < 0, 0, primary_income)) %>%
group_by(URBAN,WAVE) %>%
summarise(index = ineq(primary_income,type = c("Gini")))
ggplot(gini.years.urban,aes(x = WAVE,y = index,group = URBAN,color = URBAN)) +
geom_line()+
#geom_point()+
ggtitle("Gini Index Over Year by Urban/Rural") +
xlab("Year") + ylab("Gini Index") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))

ratio.years = df %>%
dplyr::select(HHID,HHINCPC_CPI,WAVE,city) %>%
transform(HHINCPC_CPI = ifelse(HHINCPC_CPI < 0, 0, HHINCPC_CPI)) %>%
group_by(WAVE) %>%
mutate(top10 = ifelse(HHINCPC_CPI >= quantile(HHINCPC_CPI,0.9),HHINCPC_CPI,0),
bottom10 = ifelse(HHINCPC_CPI <= quantile(HHINCPC_CPI,0.1),HHINCPC_CPI,0)) %>%
summarise(ratio = sum(bottom10)/sum(top10),sumb = sum(bottom10),sumt =sum(top10))
ggplot(ratio.years,aes(x = WAVE,y = ratio)) +
geom_line() +
geom_point()+
geom_text(aes(label = round(ratio, 3),hjust = -0.3,vjust = 0.5)) +
ggtitle("Gini Index Over Year") +
xlab("Year") + ylab("Gini Index") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))

ratio.years.province = df %>%
dplyr::select(HHID,HHINCPC_CPI,WAVE,city) %>%
transform(HHINCPC_CPI = ifelse(HHINCPC_CPI < 0, 0, HHINCPC_CPI)) %>%
group_by(WAVE,city) %>%
mutate(top10 = ifelse(HHINCPC_CPI >= quantile(HHINCPC_CPI,0.9),HHINCPC_CPI,0),
bottom10 = ifelse(HHINCPC_CPI <= quantile(HHINCPC_CPI,0.1),HHINCPC_CPI,0)) %>%
summarise(ratio = sum(bottom10)/sum(top10),sumb = sum(bottom10),sumt =sum(top10))
ggplot(ratio.years.province,aes(x = WAVE,y = ratio,group = city,color = city)) +
geom_line()+
geom_point()+
ggtitle("Gini Index Over Year by Province") +
xlab("Year") + ylab("Gini Index") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))

ggplot(ratio.years.province,aes(x = WAVE,y = ratio)) +
geom_line() +
geom_point()+
#geom_text(aes(label = round(index, 3),hjust = -0.3,vjust = 0.5)) +
ggtitle("Gini Index Over Year") +
xlab("Year") + ylab("Gini Index") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))+
facet_wrap(~city)

ratio.years.primary = df %>%
dplyr::select(HHID,HHINCPC_CPI,WAVE,PRIMARY) %>%
transform(HHINCPC_CPI = ifelse(HHINCPC_CPI < 0, 0, HHINCPC_CPI)) %>%
group_by(WAVE,PRIMARY) %>%
mutate(top10 = ifelse(HHINCPC_CPI >= quantile(HHINCPC_CPI,0.9),HHINCPC_CPI,0),
bottom10 = ifelse(HHINCPC_CPI <= quantile(HHINCPC_CPI,0.1),HHINCPC_CPI,0)) %>%
summarise(ratio = sum(bottom10)/sum(top10),sumb = sum(bottom10),sumt =sum(top10))
ggplot(ratio.years.primary,aes(x = WAVE,y = ratio,group = PRIMARY,color = PRIMARY)) +
geom_line()+
geom_point()+
ggtitle("Gini Index Over Year by Province") +
xlab("Year") + ylab("Gini Index") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))

ggplot(ratio.years.primary,aes(x = WAVE,y = ratio)) +
geom_line() +
geom_point()+
#geom_text(aes(label = round(index, 3),hjust = -0.3,vjust = 0.5)) +
ggtitle("Gini Index Over Year") +
xlab("Year") + ylab("Gini Index") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))+
facet_wrap(~PRIMARY)

Beta coefficient test
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
coefs <- names(fixef(final_model))
#year
coeftest_year <- linearHypothesis(final_model, "CYEAR=0")
#urban/rural
coeftest_urban <- linearHypothesis(final_model, "URBAN1=0")
#province
coeftest_province <- linearHypothesis(final_model, coefs[4:14])
#primary
coeftest_primary <- linearHypothesis(final_model, coefs[15:21])
#year:primary
coeftest_yearprimary <- linearHypothesis(final_model, coefs[22:28])
#year:urban
coeftest_yearurban <- linearHypothesis(final_model, coefs[29])
#year:province
coeftest_yearprovince <- linearHypothesis(final_model, coefs[30:40])
#urban:province
coeftest_urbanprovince <- linearHypothesis(final_model, coefs[41:51])
coeftest <- na.omit(rbind(coeftest_year, coeftest_urban, coeftest_province, coeftest_primary, coeftest_yearprimary, coeftest_yearurban, coeftest_yearprovince, coeftest_urbanprovince))
var <- c("Year","Urban", "Province", "Primary Income Source", "Year:Primary Source", "Year:Urban", "Year:Province", "Urban:Province")
coeftest <- cbind(var, coeftest)
kable(coeftest, col.names = c("Variable", "Degrees of Freedom", "Chi-squared", "p-value"), caption="Hypothesis Testing of Urban Coefficients",align="c")
Hypothesis Testing of Urban Coefficients
| 2 |
Year |
1 |
126.09296 |
0.0000000 |
| 21 |
Urban |
1 |
24.49895 |
0.0000007 |
| 22 |
Province |
11 |
166.26294 |
0.0000000 |
| 23 |
Primary Income Source |
7 |
47.88564 |
0.0000000 |
| 24 |
Year:Primary Source |
7 |
311.83626 |
0.0000000 |
| 25 |
Year:Urban |
1 |
14.07428 |
0.0001757 |
| 26 |
Year:Province |
11 |
486.58764 |
0.0000000 |
| 27 |
Urban:Province |
11 |
69.94011 |
0.0000000 |
png("beta-test.png")
p <- tableGrob(coeftest)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
#Variation in household income by province
yrs <- c(0,2,4,8,11,15,17,20,22,26)
xvar <- c("1989", "1991", "1993", "1997", "2000", "2004", "2006", "2009", "2011", "2015")
estimate <- as.data.frame(summary(final_model)$coefficients)
estimate <- estimate %>% dplyr::select(Estimate)
for (i in yrs) {
prov21 <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["T121",]+yrs*estimate["CYEAR:T121",]
df7 <- data.frame(prov21)
}
for (i in yrs) {
prov23 <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["T123",]+yrs*estimate["CYEAR:T123",]
df8 <- data.frame(prov23)
}
for (i in yrs) {
prov31 <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["T131",]+yrs*estimate["CYEAR:T131",]
df9 <- data.frame(prov31)
}
for (i in yrs) {
prov32 <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["T132",]+yrs*estimate["CYEAR:T132",]
df10 <- data.frame(prov32)
}
for (i in yrs) {
prov37 <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["T137",]+yrs*estimate["CYEAR:T137",]
df11 <- data.frame(prov37)
}
for (i in yrs) {
prov41 <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["T141",]+yrs*estimate["CYEAR:T141",]
df12 <- data.frame(prov41)
}
for (i in yrs) {
prov42 <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["T142",]+yrs*estimate["CYEAR:T142",]
df13 <- data.frame(prov42)
}
for (i in yrs) {
prov43 <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["T143",]+yrs*estimate["CYEAR:T143",]
df14 <- data.frame(prov43)
}
for (i in yrs) {
prov45 <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["T145",]+yrs*estimate["CYEAR:T145",]
df15 <- data.frame(prov45)
}
for (i in yrs) {
prov52 <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["T152",]+yrs*estimate["CYEAR:T152",]
df16 <- data.frame(prov52)
}
for (i in yrs) {
prov55 <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["T155",]+yrs*estimate["CYEAR:T155",]
df17 <- data.frame(prov55)
}
for (i in yrs) {
prov11 <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]
df18 <- data.frame(prov11)
}
prov.data <- cbind(xvar, df18, df7, df8, df9, df10, df11, df12, df13, df14, df15, df16, df17)
prov.data$xvar <- as.numeric(as.character(prov.data$xvar))
mdf2 <- melt(prov.data, id.vars="xvar")
plot2 <- ggplot(mdf2, aes(x=xvar, y=value, group=variable, color=variable)) +
geom_line() +
geom_point() +
scale_color_discrete(name="Province",
labels=c("Beijing", "Liaoning", "Heilongjiang", "Shanghai", "Jiangsu", "Shandong", "Henan", "Hubei", "Hunan", "Guangxi", "Guizhou", "Chongqing")) +
labs(title="Changes in Household Income per Capita Over Time \nby Province",
x="Year",
y="Beta coefficients") +
theme(plot.title = element_text(size=20))
#Variation in household income by urban/rural status
for (i in yrs) {
urb <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["URBAN1",]+estimate["CYEAR:URBAN1",]
df19 <- data.frame(urb)
}
for (i in yrs) {
rur <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]
df20 <- data.frame(rur)
}
urb.data <- cbind(xvar, df19, df20)
urb.data$xvar <- as.numeric(as.character(urb.data$xvar))
mdf3 <- melt(urb.data, id.vars="xvar")
plot3 <- ggplot(mdf3, aes(x=xvar, y=value, group=variable, color=variable)) +
geom_line() +
geom_point() +
scale_color_discrete(name="Status",
labels=c("Urban", "Rural")) +
labs(title="Changes in Household Income per Capita Over Time \nby Urban/Rural Status",
x="Year",
y="Beta coefficients") +
theme(plot.title = element_text(size=20))
#Variation in household income by income source
for (i in yrs) {
bus <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]
d <- data.frame(bus)
}
for (i in yrs) {
farm <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["PRIMARYHHFARM",]+yrs*estimate["CYEAR:PRIMARYHHFARM",]
df0 <- data.frame(farm)
}
for (i in yrs) {
fish <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["PRIMARYHHFISH",]+yrs*estimate["CYEAR:PRIMARYHHFISH",]
df1 <- data.frame(fish)
}
for (i in yrs) {
gard <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["PRIMARYHHGARD",]+yrs*estimate["CYEAR:PRIMARYHHGARD",]
df2 <- data.frame(gard)
}
for (i in yrs) {
lvst <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["PRIMARYHHLVST",]+yrs*estimate["CYEAR:PRIMARYHHLVST",]
df3 <- data.frame(lvst)
}
for (i in yrs) {
othr <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["PRIMARYHHOTHR",]+yrs*estimate["CYEAR:PRIMARYHHOTHR",]
df4 <- data.frame(othr)
}
for (i in yrs) {
retire <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["PRIMARYHHRETIRE",]+yrs*estimate["CYEAR:PRIMARYHHRETIRE",]
df5 <- data.frame(retire)
}
for (i in yrs) {
sub <- estimate["(Intercept)",]+yrs*estimate["CYEAR",]+estimate["PRIMARYHHSUB",]+yrs*estimate["CYEAR:PRIMARYHHSUB",]
df6 <- data.frame(sub)
}
source.data <- cbind(xvar, d, df0, df1, df2, df3, df4, df5, df6)
source.data$xvar <- as.numeric(as.character(source.data$xvar))
mdf1 <- melt(source.data, id.vars="xvar")
plot1 <- ggplot(mdf1, aes(x=xvar, y=value, group=variable, color=variable)) +
geom_line(aes(color=variable)) +
geom_point() +
scale_color_discrete(name="Income Source",
labels=c("Business","Farming", "Fishing", "Gardening", "Livestock", "Others", "Retirement", "Subsidies")) +
labs(title="Changes in Household Income per Capita Over Time \nby Primary Income Source",
x="Year",
y="Beta coefficients") +
theme(plot.title = element_text(size=20))